diff options
author | John van Groningen | 2011-04-04 12:26:23 +0000 |
---|---|---|
committer | John van Groningen | 2011-04-04 12:26:23 +0000 |
commit | efa180162c0186f3b1ce299c6ea3c821c71e40f0 (patch) | |
tree | 6a67db3981102c6080e6a9e0b3508b9d27fe572e | |
parent | add directory for 64 bit Intel Mac OS X version (diff) |
port to 64 bit Intel Mac OS X version
-rw-r--r-- | macho64/aap.s | 2418 | ||||
-rw-r--r-- | macho64/acompact.s | 1184 | ||||
-rw-r--r-- | macho64/acompact_rmark.s | 953 | ||||
-rw-r--r-- | macho64/acompact_rmark_prefetch.s | 1181 | ||||
-rw-r--r-- | macho64/acompact_rmarkr.s | 994 | ||||
-rw-r--r-- | macho64/acopy.s | 1267 | ||||
-rw-r--r-- | macho64/afileIO3.s | 915 | ||||
-rw-r--r-- | macho64/amark.s | 1933 | ||||
-rw-r--r-- | macho64/amark_prefetch.s | 1757 | ||||
-rw-r--r-- | macho64/areals.s | 8965 | ||||
-rw-r--r-- | macho64/astartup.s | 5119 | ||||
-rw-r--r-- | macho64/atrace.s | 474 | ||||
-rw-r--r-- | macho64/startup.s | 6 | ||||
-rw-r--r-- | macho64/startupTrace.s | 6 |
14 files changed, 27172 insertions, 0 deletions
diff --git a/macho64/aap.s b/macho64/aap.s new file mode 100644 index 0000000..0db8c1c --- /dev/null +++ b/macho64/aap.s @@ -0,0 +1,2418 @@ + + .intel_syntax noprefix + +/* d0 = rax */ +/* d1 = rbx */ +/* d2 = r10 */ +/* d3 = r11 */ +/* d7 = r15 */ +/* a0 = rcx */ +/* a1 = rdx */ +/* a2 = r8 */ +/* a3 = r9 */ +/* a4 = rsi */ +/* a6 = rdi */ + +/* d0l = eax */ +/* a2l = r8d */ +/* a3l = r9d */ + +/* d1w = bx */ + +/* qw = qword ptr */ + + .text + + .globl ap_2 + .globl ap_3 + .globl ap_4 + .globl ap_5 + .globl ap_6 + .globl ap_7 + .globl ap_8 + .globl ap_9 + .globl ap_10 + .globl ap_11 + .globl ap_12 + .globl ap_13 + .globl ap_14 + .globl ap_15 + .globl ap_16 + .globl ap_17 + .globl ap_18 + .globl ap_19 + .globl ap_20 + .globl ap_21 + .globl ap_22 + .globl ap_23 + .globl ap_24 + .globl ap_25 + .globl ap_26 + .globl ap_27 + .globl ap_28 + .globl ap_29 + .globl ap_30 + .globl ap_31 + .globl ap_32 + + .globl add_empty_node_2 + .globl add_empty_node_3 + .globl add_empty_node_4 + .globl add_empty_node_5 + .globl add_empty_node_6 + .globl add_empty_node_7 + .globl add_empty_node_8 + .globl add_empty_node_9 + .globl add_empty_node_10 + .globl add_empty_node_11 + .globl add_empty_node_12 + .globl add_empty_node_13 + .globl add_empty_node_14 + .globl add_empty_node_15 + .globl add_empty_node_16 + .globl add_empty_node_17 + .globl add_empty_node_18 + .globl add_empty_node_19 + .globl add_empty_node_20 + .globl add_empty_node_21 + .globl add_empty_node_22 + .globl add_empty_node_23 + .globl add_empty_node_24 + .globl add_empty_node_25 + .globl add_empty_node_26 + .globl add_empty_node_27 + .globl add_empty_node_28 + .globl add_empty_node_29 + .globl add_empty_node_30 + .globl add_empty_node_31 + .globl add_empty_node_32 + + .globl yet_args_needed_5 + .globl yet_args_needed_6 + .globl yet_args_needed_7 + .globl yet_args_needed_8 + .globl yet_args_needed_9 + .globl yet_args_needed_10 + .globl yet_args_needed_11 + .globl yet_args_needed_12 + .globl yet_args_needed_13 + .globl yet_args_needed_14 + .globl yet_args_needed_15 + .globl yet_args_needed_16 + .globl yet_args_needed_17 + .globl yet_args_needed_18 + .globl yet_args_needed_19 + .globl yet_args_needed_20 + .globl yet_args_needed_21 + .globl yet_args_needed_22 + .globl yet_args_needed_23 + .globl yet_args_needed_24 + .globl yet_args_needed_25 + .globl yet_args_needed_26 + .globl yet_args_needed_27 + .globl yet_args_needed_28 + .globl yet_args_needed_29 + .globl yet_args_needed_30 + .globl yet_args_needed_31 + +ap_32: + mov r9,qword ptr [r8] + mov rbx,32*16 + cmp word ptr [r9],bx + je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap32 + +ap_31: + mov r9,qword ptr [r8] + mov rbx,31*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap31 + +ap_30: + mov r9,qword ptr [r8] + mov rbx,30*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap30 + +ap_29: + mov r9,qword ptr [r8] + mov rbx,29*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap29 + +ap_28: + mov r9,qword ptr [r8] + mov rbx,28*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap28 + +ap_27: + mov r9,qword ptr [r8] + mov rbx,27*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap27 + +ap_26: + mov r9,qword ptr [r8] + mov rbx,26*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap26 + +ap_25: + mov r9,qword ptr [r8] + mov rbx,25*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap25 + +ap_24: + mov r9,qword ptr [r8] + mov rbx,24*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap24 + +ap_23: + mov r9,qword ptr [r8] + mov rbx,23*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap23 + +ap_22: + mov r9,qword ptr [r8] + mov rbx,22*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap22 + +ap_21: + mov r9,qword ptr [r8] + mov rbx,21*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap21 + +ap_20: + mov r9,qword ptr [r8] + mov rbx,20*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap20 + +ap_19: + mov r9,qword ptr [r8] + mov rbx,19*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap19 + +ap_18: + mov r9,qword ptr [r8] + mov rbx,18*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap18 + +ap_17: + mov r9,qword ptr [r8] + mov rbx,17*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap17 + +ap_16: + mov r9,qword ptr [r8] + mov rbx,16*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap16 + +ap_15: + mov r9,qword ptr [r8] + mov rbx,15*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap15 + +ap_14: + mov r9,qword ptr [r8] + mov rbx,14*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap14 + +ap_13: + mov r9,qword ptr [r8] + mov rbx,13*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap13 + +ap_12: + mov r9,qword ptr [r8] + mov rbx,12*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap12 + +ap_11: + mov r9,qword ptr [r8] + mov rbx,11*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap11 + +ap_10: + mov r9,qword ptr [r8] + mov rbx,10*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap10 + +ap_9: + mov r9,qword ptr [r8] + mov rbx,9*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap9 + +ap_8: + mov r9,qword ptr [r8] + mov rbx,8*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap8 + +ap_7: + mov r9,qword ptr [r8] + mov rbx,7*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap7 + +ap_6: + mov r9,qword ptr [r8] + mov rbx,6*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap6 + +ap_5: + mov r9,qword ptr [r8] + mov rbx,5*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap5 + +ap_4: + mov r9,qword ptr [r8] + mov rbx,4*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap4 + +ap_3: + mov r9,qword ptr [r8] + mov rbx,3*16 + cmp word ptr [r9],bx + att_je fast_ap + + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap3 + +ap_2: + mov r9,qword ptr [r8] + mov rbx,2*16 + cmp word ptr [r9],bx + jne no_fast_ap2_ + +fast_ap_2_2_: + movzx rax,word ptr -2[r9] + add rbx,r9 + mov r9,qword ptr -10[rbx] + .if PROFILE + sub r9,24 + .else + sub r9,12 + .endif + cmp rax,1 + jb repl_args_0_2 + je repl_args_1 + + cmp rax,3 + jb repl_args_2 + + mov qword ptr [rsi],rcx + mov qword ptr 8[rsi],rdx + lea rsi,16[rsi] + mov rdx,qword ptr 16[r8] + + jmp fast_ap_ + +no_fast_ap2_: + mov qword ptr [rsi],rcx + mov rcx,rdx + mov rdx,r8 + mov r8,qword ptr 6[r9] + add rsi,8 + jmp no_fast_ap2 + +fast_ap_2_2: + mov r8,rdx + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + att_jmp fast_ap_2_2_ + +fast_ap_2: + mov r8,rdx + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + +fast_ap: + movzx rax,word ptr -2[r9] + add rbx,r9 + mov r9,qword ptr -10[rbx] + .if PROFILE + sub r9,24 + .else + sub r9,12 + .endif + cmp rax,1 + jb repl_args_0 + att_je repl_args_1 + + cmp rax,3 + att_jb repl_args_2 + + mov qword ptr [rsi],rcx + mov qword ptr 8[rsi],rdx + lea rsi,16[rsi] + mov rdx,qword ptr 16[r8] + +fast_ap_: + mov r8,qword ptr 8[r8] + je repl_args_3 + + cmp rax,5 + jb repl_args_4 + je repl_args_5 + + cmp rax,7 + jb repl_args_6 + +repl_args_7_: + mov rbp,qword ptr -16[rdx+rax*8] + mov qword ptr [rsi],rbp + sub rax,1 + add rsi,8 + cmp rax,6 + att_jne repl_args_7_ + +repl_args_6: + mov rax,qword ptr 32[rdx] + mov qword ptr [rsi],rax + mov rax,qword ptr 24[rdx] + mov qword ptr 8[rsi],rax + mov rax,qword ptr 16[rdx] + mov qword ptr 16[rsi],rax + mov rcx,qword ptr 8[rdx] + mov rdx,qword ptr [rdx] + add rsi,24 + jmp r9 + +repl_args_0: + mov r8,rdx + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 +repl_args_0_2: + jmp r9 + +repl_args_1: + mov r8,qword ptr 8[r8] + jmp r9 + +repl_args_2: + mov qword ptr [rsi],rcx + mov rcx,rdx + add rsi,8 + mov rdx,qword ptr 16[r8] + mov r8,qword ptr 8[r8] + jmp r9 + +repl_args_3: + mov rcx,qword ptr 8[rdx] + mov rdx,qword ptr [rdx] + jmp r9 + +repl_args_4: + mov rax,qword ptr 16[rdx] + mov qword ptr [rsi],rax + mov rcx,qword ptr 8[rdx] + mov rdx,qword ptr [rdx] + add rsi,8 + jmp r9 + +repl_args_5: + mov rax,qword ptr 24[rdx] + mov qword ptr [rsi],rax + mov rax,qword ptr 16[rdx] + mov qword ptr 8[rsi],rax + mov rcx,qword ptr 8[rdx] + mov rdx,qword ptr [rdx] + add rsi,16 + jmp r9 + + +no_fast_ap32: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,31*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap31: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,30*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap30: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,29*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap29: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,28*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap28: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,27*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap27: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,26*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap26: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,25*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap25: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,24*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap24: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,23*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap23: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,22*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap22: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,21*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap21: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,20*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap20: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,19*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap19: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,18*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap18: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,17*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap17: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,16*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap16: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,15*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap15: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,14*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap14: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,13*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap13: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,12*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap12: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,11*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap11: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,10*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap10: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,9*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap9: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,8*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap8: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,7*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap7: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,6*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap6: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,5*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap5: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,4*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap4: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,3*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap3: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + + mov rbx,2*16 + cmp word ptr [r9],bx + att_je fast_ap_2 + + mov r8,qword ptr 6[r9] +no_fast_ap2: + call r8 + mov r9,qword ptr [rcx] + mov rdx,rcx + mov rcx,qword ptr -8[rsi] + sub rsi,8 + jmp qword ptr 6[r9] + + +add_empty_node_2: + sub r15,3 + jb add_empty_node_2_gc +add_empty_node_2_gc_: + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + mov r8,rdx + mov rdx,rcx + mov rcx,rdi + add rdi,24 + ret +add_empty_node_2_gc: + att_call collect_2 + att_jmp add_empty_node_2_gc_ + +add_empty_node_3: + sub r15,3 + jb add_empty_node_3_gc +add_empty_node_3_gc_: + mov qword ptr [rsi],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_3_gc: + att_call collect_3 + att_jmp add_empty_node_3_gc_ + +add_empty_node_4: + sub r15,3 + jb add_empty_node_4_gc +add_empty_node_4_gc_: + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 + mov qword ptr -8[rsi],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_4_gc: + att_call collect_3 + att_jmp add_empty_node_4_gc_ + +add_empty_node_5: + sub r15,3 + jb add_empty_node_5_gc +add_empty_node_5_gc_: + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 + mov r9,qword ptr -16[rsi] + mov qword ptr -8[rsi],r9 + mov qword ptr -16[rsi],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_5_gc: + att_call collect_3 + att_jmp add_empty_node_5_gc_ + +add_empty_node_6: + sub r15,3 + jb add_empty_node_6_gc +add_empty_node_6_gc_: + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 + mov r9,qword ptr -16[rsi] + mov qword ptr -8[rsi],r9 + mov r9,qword ptr -24[rsi] + mov qword ptr -16[rsi],r9 + mov qword ptr -24[rsi],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_6_gc: + att_call collect_3 + att_jmp add_empty_node_6_gc_ + +add_empty_node_7: + sub r15,3 + jb add_empty_node_7_gc +add_empty_node_7_gc_: + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 + mov r9,qword ptr -16[rsi] + mov qword ptr -8[rsi],r9 + mov r9,qword ptr -24[rsi] + mov qword ptr -16[rsi],r9 + mov r9,qword ptr -32[rsi] + mov qword ptr -24[rsi],r9 + mov qword ptr -32[rsi],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_7_gc: + att_call collect_3 + att_jmp add_empty_node_7_gc_ + +add_empty_node_8: + sub r15,3 + jb add_empty_node_8_gc +add_empty_node_8_gc_: + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 + mov r9,qword ptr -16[rsi] + mov qword ptr -8[rsi],r9 + mov r9,qword ptr -24[rsi] + mov qword ptr -16[rsi],r9 + mov r9,qword ptr -32[rsi] + mov qword ptr -24[rsi],r9 + mov r9,qword ptr -40[rsi] + mov qword ptr -32[rsi],r9 + mov qword ptr -40[rsi],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_8_gc: + att_call collect_3 + att_jmp add_empty_node_8_gc_ + +add_empty_node_9: + sub r15,3 + jb add_empty_node_9_gc +add_empty_node_9_gc_: + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 + mov r9,qword ptr -16[rsi] + mov qword ptr -8[rsi],r9 + mov r9,qword ptr -24[rsi] + mov qword ptr -16[rsi],r9 + mov r9,qword ptr -32[rsi] + mov qword ptr -24[rsi],r9 + mov r9,qword ptr -40[rsi] + mov qword ptr -32[rsi],r9 + mov r9,qword ptr -48[rsi] + mov qword ptr -40[rsi],r9 + mov qword ptr -48[rsi],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_9_gc: + att_call collect_3 + att_jmp add_empty_node_9_gc_ + +add_empty_node_10: + sub r15,3 + jb add_empty_node_10_gc +add_empty_node_10_gc_: + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 + mov r9,qword ptr -16[rsi] + mov qword ptr -8[rsi],r9 + mov r9,qword ptr -24[rsi] + mov qword ptr -16[rsi],r9 + mov r9,qword ptr -32[rsi] + mov qword ptr -24[rsi],r9 + mov r9,qword ptr -40[rsi] + mov qword ptr -32[rsi],r9 + mov r9,qword ptr -48[rsi] + mov qword ptr -40[rsi],r9 + mov r9,qword ptr -56[rsi] + mov qword ptr -48[rsi],r9 + mov qword ptr -56[rsi],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_10_gc: + att_call collect_3 + att_jmp add_empty_node_10_gc_ + +add_empty_node_11: + sub r15,3 + jb add_empty_node_11_gc +add_empty_node_11_gc_: + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 + mov r9,qword ptr -16[rsi] + mov qword ptr -8[rsi],r9 + mov r9,qword ptr -24[rsi] + mov qword ptr -16[rsi],r9 + mov r9,qword ptr -32[rsi] + mov qword ptr -24[rsi],r9 + mov r9,qword ptr -40[rsi] + mov qword ptr -32[rsi],r9 + mov r9,qword ptr -48[rsi] + mov qword ptr -40[rsi],r9 + mov r9,qword ptr -56[rsi] + mov qword ptr -48[rsi],r9 + mov r9,qword ptr -64[rsi] + mov qword ptr -56[rsi],r9 + mov qword ptr -64[rsi],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_11_gc: + att_call collect_3 + att_jmp add_empty_node_11_gc_ + +add_empty_node_32: + mov rbx,7 + att_jmp add_empty_node_12_ +add_empty_node_28: + mov rbx,6 + att_jmp add_empty_node_12_ +add_empty_node_24: + mov rbx,5 + att_jmp add_empty_node_12_ +add_empty_node_20: + mov rbx,4 + att_jmp add_empty_node_12_ +add_empty_node_16: + mov rbx,3 + att_jmp add_empty_node_12_ +add_empty_node_12: + mov rbx,2 +add_empty_node_12_: + sub r15,3 + jb add_empty_node_12_gc +add_empty_node_12_gc_: + mov rax,rsi +add_empty_node_12_lp: + mov r9,qword ptr -8[rax] + mov qword ptr [rax],r9 + mov r9,qword ptr -16[rax] + mov qword ptr -8[rax],r9 + mov r9,qword ptr -24[rax] + mov qword ptr -16[rax],r9 + mov r9,qword ptr -32[rax] + mov qword ptr -24[rax],r9 + sub rax,32 + sub rbx,1 + att_jne add_empty_node_12_lp + mov qword ptr [rax],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_12_gc: + att_call collect_3 + att_jmp add_empty_node_12_gc_ + +add_empty_node_29: + mov rbx,6 + jmp add_empty_node_13_ +add_empty_node_25: + mov rbx,5 + att_jmp add_empty_node_13_ +add_empty_node_21: + mov rbx,4 + att_jmp add_empty_node_13_ +add_empty_node_17: + mov rbx,3 + att_jmp add_empty_node_13_ +add_empty_node_13: + mov rbx,2 +add_empty_node_13_: + sub r15,3 + jb add_empty_node_13_gc +add_empty_node_13_gc_: + mov rax,rsi + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 +add_empty_node_13_lp: + mov r9,qword ptr -16[rax] + mov qword ptr -8[rax],r9 + mov r9,qword ptr -24[rax] + mov qword ptr -16[rax],r9 + mov r9,qword ptr -32[rax] + mov qword ptr -24[rax],r9 + mov r9,qword ptr -40[rax] + mov qword ptr -32[rax],r9 + sub rax,32 + sub rbx,1 + att_jne add_empty_node_13_lp + mov qword ptr -8[rax],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_13_gc: + att_call collect_3 + att_jmp add_empty_node_13_gc_ + +add_empty_node_30: + mov rbx,6 + att_jmp add_empty_node_14_ +add_empty_node_26: + mov rbx,5 + att_jmp add_empty_node_14_ +add_empty_node_22: + mov rbx,4 + att_jmp add_empty_node_14_ +add_empty_node_18: + mov rbx,3 + att_jmp add_empty_node_14_ +add_empty_node_14: + mov rbx,2 +add_empty_node_14_: + sub r15,3 + jb add_empty_node_14_gc +add_empty_node_14_gc_: + mov rax,rsi + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 + mov rsi,qword ptr -16[rsi] + mov qword ptr -8[rsi],r9 +add_empty_node_14_lp: + mov r9,qword ptr -24[rax] + mov qword ptr -16[rax],r9 + mov r9,qword ptr -32[rax] + mov qword ptr -24[rax],r9 + mov r9,qword ptr -40[rax] + mov qword ptr -32[rax],r9 + mov r9,qword ptr -48[rax] + mov qword ptr -40[rax],r9 + sub rax,32 + sub rbx,1 + att_jne add_empty_node_14_lp + mov qword ptr -16[rax],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_14_gc: + att_call collect_3 + att_jmp add_empty_node_14_gc_ + +add_empty_node_31: + mov rbx,6 + att_jmp add_empty_node_15_ +add_empty_node_27: + mov rbx,5 + att_jmp add_empty_node_15_ +add_empty_node_23: + mov rbx,4 + att_jmp add_empty_node_15_ +add_empty_node_19: + mov rbx,3 + att_jmp add_empty_node_15_ +add_empty_node_15: + mov rbx,2 +add_empty_node_15_: + sub r15,3 + jb add_empty_node_15_gc +add_empty_node_15_gc_: + mov rax,r9 + mov r9,qword ptr -8[rsi] + mov qword ptr [rsi],r9 + mov r9,qword ptr -16[rsi] + mov qword ptr -8[rsi],r9 + mov r9,qword ptr -24[rsi] + mov qword ptr -16[rsi],r9 +add_empty_node_15_lp: + mov r9,qword ptr -32[rax] + mov qword ptr -24[rax],r9 + mov r9,qword ptr -40[rax] + mov qword ptr -32[rax],r9 + mov r9,qword ptr -48[rax] + mov qword ptr -40[rax],r9 + mov r9,qword ptr -56[rax] + mov qword ptr -48[rax],r9 + sub rax,32 + sub rbx,1 + att_jne add_empty_node_15_lp + mov qword ptr -24[rax],rdi + lea rbp,__cycle__in__spine[rip] + mov qword ptr [rdi],rbp + add rsi,8 + add rdi,24 + ret +add_empty_node_15_gc: + att_call collect_3 + att_jmp add_empty_node_15_gc_ + + +yet_args_needed_0: + sub r15,2 + jb yet_args_needed_0_gc +yet_args_needed_0_gc_r: + mov qword ptr 8[rdi],rcx + mov rax,qword ptr [rdx] + mov rcx,rdi + add rax,16 + mov qword ptr [rdi],rax + add rdi,16 + ret + +yet_args_needed_0_gc: + att_call collect_2 + att_jmp yet_args_needed_0_gc_r + + + .align 2 + sub r15,3 + jae short build_node_2_gc_r + jmp short build_node_2_gc + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_1: + sub r15,3 + jb yet_args_needed_1_gc +yet_args_needed_1_gc_r: + mov qword ptr 16[rdi],rcx + mov rax,qword ptr [rdx] + mov rcx,rdi + add rax,16 + mov qword ptr [rdi],rax + mov rbx,qword ptr 8[rdx] + mov qword ptr 8[rdi],rbx + add rdi,24 + ret + +yet_args_needed_1_gc: + att_call collect_2 + att_jmp yet_args_needed_1_gc_r + +build_node_2_gc_r: + mov qword ptr [rdi],rbx + mov qword ptr 8[rdi],rdx + mov qword ptr 16[rdi],rcx + mov rcx,rdi + add rdi,24 + ret + +build_node_2_gc: + att_call collect_2 + att_jmp build_node_2_gc_r + + + .align 2 + sub r15,5 + jae short build_node_3_gc_r + jmp short build_node_3_gc + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_2: + sub r15,5 + jb gc_22 +gc_r_22: + mov rax,qword ptr [rdx] + mov qword ptr 8[rdi],rcx + add rax,16 + mov r10,qword ptr 8[rdx] + mov qword ptr 16[rdi],rax + lea rcx,16[rdi] + mov rbp,qword ptr 16[rdx] + mov qword ptr 24[rdi],r10 + mov qword ptr [rdi],rbp + mov qword ptr 32[rdi],rdi + add rdi,40 + ret + +gc_22: att_call collect_2 + att_jmp gc_r_22 + +build_node_3_gc_r: + mov qword ptr [rdi],rbx + lea rbp,24[rdi] + mov qword ptr 8[rdi],r8 + mov qword ptr 16[rdi],rbp + mov qword ptr 24[rdi],rdx + mov qword ptr 32[rdi],rcx + mov rcx,rdi + add rdi,40 + ret + +build_node_3_gc: + att_call collect_2 + att_jmp build_node_3_gc_r + + + .align 2 + sub r15,6 + jae short build_node_4_gc_r + jmp short build_node_4_gc + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_3: + sub r15,6 + jb gc_23 +gc_r_23: + mov rax,qword ptr [rdx] + mov qword ptr 16[rdi],rcx + add rax,16 + mov r10,qword ptr 8[rdx] + mov qword ptr 24[rdi],rax + mov rdx,qword ptr 16[rdx] + mov qword ptr 32[rdi],r10 + mov rbp,qword ptr [rdx] + mov qword ptr 40[rdi],rdi + mov qword ptr [rdi],rbp + mov rbp,qword ptr 8[rdx] + lea rcx,24[rdi] + mov qword ptr 8[rdi],rbp + add rdi,48 + ret + +gc_23: att_call collect_2 + att_jmp gc_r_23 + +build_node_4_gc_r: + mov qword ptr [rdi],rbx + lea rbp,24[rdi] + mov qword ptr 8[rdi],r8 + mov qword ptr 16[rdi],rbp + mov qword ptr 24[rdi],rdx + mov qword ptr 32[rdi],rcx + mov rcx,rdi + mov rbp,qword ptr -8[rsi] + mov qword ptr 40[rdi],rbp + sub rsi,8 + add rdi,48 + ret + +build_node_4_gc: + att_call collect_2 + att_jmp build_node_4_gc_r + + + .align 2 + sub r15,7 + jae short build_node_5_gc_r + jmp build_node_5_gc + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_4: + sub r15,7 + jb gc_24 +gc_r_24: + mov rax,qword ptr [rdx] + mov qword ptr 24[rdi],rcx + add rax,16 + mov r10,qword ptr 8[rdx] + mov qword ptr 32[rdi],rax + mov rdx,qword ptr 16[rdx] + mov qword ptr 40[rdi],r10 + mov rbp,qword ptr [rdx] + mov 48[rdi],rdi + mov qword ptr [rdi],rbp + mov rbp,qword ptr 8[rdx] + lea rcx,32[rdi] + mov qword ptr 8[rdi],rbp + mov rbp,qword ptr 16[rdx] + mov qword ptr 16[rdi],rbp + add rdi,56 + ret + +gc_24: att_call collect_2 + att_jmp gc_r_24 + +build_node_5_gc_r: + mov qword ptr [rdi],rbx + lea rbp,24[rdi] + mov qword ptr 8[rdi],r8 + mov qword ptr 16[rdi],rbp + mov qword ptr 24[rdi],rdx + mov qword ptr 32[rdi],rcx + mov rcx,rdi + mov rbp,qword ptr -8[rsi] + mov qword ptr 40[rdi],rbp + mov rbp,qword ptr -16[rsi] + mov qword ptr 48[rdi],rbp + sub rsi,16 + add rdi,56 + ret + +build_node_5_gc: + att_call collect_2 + att_jmp build_node_5_gc_r + + + .align 2 + mov eax,8 + jmp build_node_ + nop + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_5: + mov rbx,qword ptr [rdx] + mov rax,8 + jmp yet_args_needed_ + + + .align 2 + mov eax,9 + att_jmp build_node_ + nop + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_6: + mov rbx,qword ptr [rdx] + mov rax,9 + att_jmp yet_args_needed_ + + .align 2 + mov eax,10 + att_jmp build_node_ + nop + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_7: + mov rbx,qword ptr [rdx] + mov rax,10 + att_jmp yet_args_needed_ + + .align 2 + mov eax,11 + att_jmp build_node_ + nop + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_8: + mov rbx,qword ptr [rdx] + mov rax,11 + att_jmp yet_args_needed_ + + .align 2 + mov eax,12 + att_jmp build_node_ + nop + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_9: + mov rbx,qword ptr [rdx] + mov rax,12 + att_jmp yet_args_needed_ + + .align 2 + mov eax,13 + att_jmp build_node_ + nop + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_10: + mov rbx,qword ptr [rdx] + mov rax,13 + att_jmp yet_args_needed_ + + .align 2 + mov eax,14 + att_jmp build_node_ + nop + nop + .align 2 + .if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + .endif +yet_args_needed_11: + mov rbx,qword ptr [rdx] + mov rax,14 + att_jmp yet_args_needed_ + + .align 2 + mov eax,15 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_12: + mov rbx,qword ptr [rdx] + mov rax,15 + att_jmp yet_args_needed_ + + .align 2 + mov eax,16 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_13: + mov rbx,qword ptr [rdx] + mov rax,16 + att_jmp yet_args_needed_ + + .align 2 + mov eax,17 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_14: + mov rbx,qword ptr [rdx] + mov rax,17 + att_jmp yet_args_needed_ + + .align 2 + mov eax,18 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_15: + mov rbx,qword ptr [rdx] + mov rax,18 + att_jmp yet_args_needed_ + + .align 2 + mov eax,19 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_16: + mov rbx,qword ptr [rdx] + mov rax,19 + att_jmp yet_args_needed_ + + .align 2 + mov eax,20 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_17: + mov rbx,qword ptr [rdx] + mov rax,20 + att_jmp yet_args_needed_ + + .align 2 + mov eax,21 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_18: + mov rbx,qword ptr [rdx] + mov rax,21 + att_jmp yet_args_needed_ + + .align 2 + mov eax,22 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_19: + mov rbx,qword ptr [rdx] + mov rax,22 + att_jmp yet_args_needed_ + + .align 2 + mov eax,23 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_20: + mov rbx,qword ptr [rdx] + mov rax,23 + att_jmp yet_args_needed_ + + .align 2 + mov eax,24 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_21: + mov rbx,qword ptr [rdx] + mov rax,24 + att_jmp yet_args_needed_ + + .align 2 + mov eax,25 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_22: + mov rbx,qword ptr [rdx] + mov rax,25 + att_jmp yet_args_needed_ + + .align 2 + mov eax,26 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_23: + mov rbx,qword ptr [rdx] + mov rax,26 + att_jmp yet_args_needed_ + + .align 2 + mov eax,27 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_24: + mov rbx,qword ptr [rdx] + mov rax,27 + att_jmp yet_args_needed_ + + .align 2 + mov eax,28 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_25: + mov rbx,qword ptr [rdx] + mov rax,28 + att_jmp yet_args_needed_ + + .align 2 + mov eax,29 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_26: + mov rbx,qword ptr [rdx] + mov rax,29 + att_jmp yet_args_needed_ + + .align 2 + mov eax,30 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_27: + mov rbx,qword ptr [rdx] + mov rax,30 + att_jmp yet_args_needed_ + + .align 2 + mov eax,31 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_28: + mov rbx,qword ptr [rdx] + mov rax,31 + att_jmp yet_args_needed_ + + .align 2 + mov eax,32 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_29: + mov rbx,qword ptr [rdx] + mov rax,32 + att_jmp yet_args_needed_ + + .align 2 + mov eax,33 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_30: + mov rbx,qword ptr [rdx] + mov rax,33 + att_jmp yet_args_needed_ + + .align 2 + mov eax,34 + att_jmp build_node_ + nop + nop + .align 2 +.if PROFILE + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop +.endif +yet_args_needed_31: + mov rbx,qword ptr [rdx] + mov rax,34 + att_jmp yet_args_needed_ + +yet_args_needed: + mov rbx,qword ptr [rdx] + movzx rax,word ptr -2[rbx] + add rax,3 +yet_args_needed_: + sub r15,rax + jb yet_args_needed_gc +yet_args_needed_gc_r: + mov r11,qword ptr 8[rdx] + sub rax,3+1+4 + mov rdx,qword ptr 16[rdx] + mov r10,rdi + mov rbp,qword ptr [rdx] + mov qword ptr [rdi],rbp + mov rbp,qword ptr 8[rdx] + mov qword ptr 8[rdi],rbp + mov rbp,qword ptr 16[rdx] + mov qword ptr 16[rdi],rbp + add rdx,24 + add rdi,24 + +yet_args_needed_cp_a: + mov rbp,qword ptr [rdx] + add rdx,8 + mov qword ptr [rdi],rbp + add rdi,8 + sub rax,1 + att_jge yet_args_needed_cp_a + + mov qword ptr [rdi],rcx + add rbx,16 + mov qword ptr 8[rdi],rbx + lea rcx,8[rdi] + mov qword ptr 16[rdi],r11 + mov qword ptr 24[rdi],r10 + add rdi,32 + ret + +yet_args_needed_gc: + att_call collect_2 + att_jmp yet_args_needed_gc_r + +build_node_: + sub r15,rax + jb build_node_gc +build_node_gc_r: + mov qword ptr [rdi],rbx + lea rbp,24[rdi] + mov qword ptr 8[rdi],r8 + mov qword ptr 16[rdi],rbp + mov qword ptr 24[rdi],rdx + mov qword ptr 32[rdi],rcx + mov rcx,rdi + mov r8,qword ptr -8[rsi] + mov qword ptr 40[rdi],r8 + mov r8,qword ptr -16[rsi] + sub rsi,16 + mov qword ptr 48[rdi],r8 + add rdi,56 + + sub rax,5+2 +build_node_cp_a: + mov r8,qword ptr -8[rsi] + sub rsi,8 + mov qword ptr [rdi],r8 + add rdi,8 + sub rax,1 + att_jne build_node_cp_a + + ret + +build_node_gc: + att_call collect_3 + att_jmp build_node_gc_r + diff --git a/macho64/acompact.s b/macho64/acompact.s new file mode 100644 index 0000000..44251d4 --- /dev/null +++ b/macho64/acompact.s @@ -0,0 +1,1184 @@ + +/* mark used nodes and pointers in argument parts and link backward pointers */ + + mov rax,qword ptr heap_size_65[rip] + shl rax,6 + mov qword ptr heap_size_64_65[rip],rax + + lea rax,(-16000)[rsp] + mov qword ptr end_stack[rip],rax + + mov rax,qword ptr caf_list[rip] + + test qword ptr _flags[rip],4096 + jne pmarkr + + test rax,rax + je end_mark_cafs + +mark_cafs_lp: + push (-8)[rax] + + lea rsi,8[rax] + mov rax,qword ptr [rax] + lea rcx,[rsi+rax*8] + + mov qword ptr end_vector[rip],rcx + + call rmark_stack_nodes + + pop rax + test rax,rax + att_jne mark_cafs_lp + +end_mark_cafs: + mov rsi,qword ptr stack_p[rip] + + mov rcx,qword ptr stack_top[rip] + mov qword ptr end_vector[rip],rcx + + att_call rmark_stack_nodes + + att_call add_mark_compact_garbage_collect_time + + jmp compact_heap + +pmarkr: + test rax,rax + je end_rmarkp_cafs + +rmarkp_cafs_lp: + push (-8)[rax] + + lea rsi,8[rax] + mov rax,qword ptr [rax] + lea rcx,[rsi+rax*8] + + mov qword ptr end_vector[rip],rcx + + call rmarkp_stack_nodes + + pop rax + test rax,rax + att_jne rmarkp_cafs_lp + +end_rmarkp_cafs: + mov rsi,qword ptr stack_p[rip] + + mov rcx,qword ptr stack_top[rip] + mov qword ptr end_vector[rip],rcx + + att_call rmarkp_stack_nodes + + att_call add_mark_compact_garbage_collect_time + + att_jmp compact_heap + + .include "acompact_rmark.s" + + .include "acompact_rmark_prefetch.s" + + .include "acompact_rmarkr.s" + +/* compact the heap */ + +compact_heap: + + lea rcx,finalizer_list[rip] + lea rdx,free_finalizer_list[rip] + + mov rbp,qword ptr [rcx] +determine_free_finalizers_after_compact1: + lea r9,__Nil-8[rip] + cmp rbp,r9 + je end_finalizers_after_compact1 + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rbp + mov rbx,rax + and rax,31*9 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rax] + test esi,dword ptr [rdi+rbx*4] + je finalizer_not_used_after_compact1 + + mov rax,qword ptr [rbp] + mov rsi,rbp + jmp finalizer_find_descriptor + +finalizer_find_descriptor_lp: + and rax,-4 + mov rsi,rax + mov rax,qword ptr [rax] +finalizer_find_descriptor: + test rax,1 + att_jne finalizer_find_descriptor_lp + + lea r9,e____system__kFinalizerGCTemp+2[rip] + mov qword ptr [rsi],r9 + + cmp rbp,rcx + ja finalizer_no_reverse + + mov rax,qword ptr [rbp] + lea rsi,1[rcx] + mov qword ptr [rbp],rsi + mov qword ptr [rcx],rax + +finalizer_no_reverse: + lea rcx,8[rbp] + mov rbp,qword ptr 8[rbp] + att_jmp determine_free_finalizers_after_compact1 + +finalizer_not_used_after_compact1: + lea r9,e____system__kFinalizerGCTemp+2[rip] + mov qword ptr [rbp],r9 + + mov qword ptr [rdx],rbp + lea rdx,8[rbp] + + mov rbp ,qword ptr 8[rbp] + mov qword ptr [rcx],rbp + + att_jmp determine_free_finalizers_after_compact1 + +end_finalizers_after_compact1: + mov qword ptr [rdx],rbp + + mov rcx,qword ptr finalizer_list[rip] + lea r9,__Nil-8[rip] + cmp rcx,r9 + je finalizer_list_empty + test rcx,3 + jne finalizer_list_already_reversed + mov rax ,qword ptr [rcx] + lea r9,finalizer_list+1[rip] + mov qword ptr [rcx],r9 + mov qword ptr finalizer_list[rip],rax +finalizer_list_already_reversed: +finalizer_list_empty: + + lea rbp,free_finalizer_list[rip] + lea r9,__Nil-8[rip] + cmp qword ptr [rbp],r9 + je free_finalizer_list_empty + + lea r9,free_finalizer_list+8[rip] + mov qword ptr end_vector[rip],r9 + + test qword ptr _flags[rip],4096 + je no_pmarkr + att_call rmarkp_stack_nodes + att_jmp free_finalizer_list_empty +no_pmarkr: + att_call rmark_stack_nodes + +free_finalizer_list_empty: + + mov rax,qword ptr heap_size_65[rip] + mov rbx,rax + shl rbx,6 + + add rbx,qword ptr heap_p3[rip] + + mov qword ptr end_heap_p3[rip],rbx + + add rax,3 + shr rax,2 + mov r12,rax + + mov r8,qword ptr heap_vector[rip] + + lea rbx,4[r8] + neg rbx + mov qword ptr neg_heap_vector_plus_4[rip],rbx + + mov rdi,qword ptr heap_p3[rip] + xor rsi,rsi + jmp skip_zeros + +/* %rax ,%rcx ,%rbp : free */ +find_non_zero_long: +skip_zeros: + sub r12,1 + jc end_move + mov esi,dword ptr [r8] + add r8,4 + test rsi,rsi + att_je skip_zeros +/* %rbp : free */ +end_skip_zeros: + mov rbp,qword ptr neg_heap_vector_plus_4[rip] + + add rbp,r8 + + shl rbp,6 + add rbp,qword ptr heap_p3[rip] + +bsf_and_copy_nodes: + movzx rax,sil + lea r9,first_one_bit_table[rip] + test rax,rax + jne found_bit1 + movzx rcx,si + shr rcx,8 + jne found_bit2 + mov rax,rsi + and rax,0x0ff0000 + jne found_bit3 + mov rcx,rsi + shr rcx,24 + movzx rcx,byte ptr [r9+rcx*1] + add rcx,24 + jmp copy_nodes + +found_bit3: + shr rax,16 + movzx rcx,byte ptr [r9+rax*1] + add rcx,16 + att_jmp copy_nodes + +found_bit2: + movzx rcx,byte ptr [r9+rcx*1] + add rcx,8 + att_jmp copy_nodes + +found_bit1: + movzx rcx,byte ptr [r9+rax*1] + +copy_nodes: + mov rax,qword ptr [rbp+rcx*8] + shr esi,1 + lea rbp,8[rbp+rcx*8] + shr esi,cl + mov rcx,rbp + + dec rax + + test rax,2 + je begin_update_list_2 + +move_argument_part: + mov rbx,qword ptr (-18)[rax] + sub rax,2 + + test rbx,1 + je end_list_2 +find_descriptor_2: + and rbx,-4 + mov rbx,qword ptr [rbx] + test rbx,1 + att_jne find_descriptor_2 + +end_list_2: + mov rdx,rbx + movzx rbx,word ptr (-2)[rbx] + cmp rbx,256 + jb no_record_arguments + + movzx rdx,word ptr (-2+2)[rdx] + sub rdx,2 + jae copy_record_arguments_aa + + sub rbx,256+3 + +copy_record_arguments_all_b: + push rbx + mov rbx,qword ptr heap_vector[rip] + +update_up_list_1r: + mov rdx,rax + add rax,qword ptr neg_heap_p3[rip] + + push rcx + + mov rcx,rax + + shr rax,8 + and rcx,31*8 + + lea r9,bit_set_table2[rip] + mov ecx,dword ptr [r9+rcx*1] + mov eax,dword ptr [rbx+rax*4] + + and rax,rcx + + pop rcx + je copy_argument_part_1r + + mov rax,qword ptr [rdx] + mov qword ptr [rdx],rdi + sub rax,3 + att_jmp update_up_list_1r + +copy_argument_part_1r: + mov rax,qword ptr [rdx] + mov qword ptr [rdx],rdi + mov qword ptr [rdi],rax + add rdi,8 + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + shr rax,3 + + mov rbx,rax + and rbx,31 + cmp rbx,1 + jae bit_in_this_word + + dec r12 + mov esi,dword ptr [r8] + add r8,4 + + mov rbp,qword ptr neg_heap_vector_plus_4[rip] + add rbp,r8 + shl rbp,6 + add rbp,qword ptr heap_p3[rip] + +bit_in_this_word: + shr esi,1 + add rbp,8 + + pop rbx + +copy_b_record_argument_part_arguments: + mov rax,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rax + add rdi,8 + sub rbx,1 + att_jnc copy_b_record_argument_part_arguments + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +copy_record_arguments_aa: + sub rbx,256+2 + sub rbx,rdx + + push rbx + push rdx + +update_up_list_2r: + mov rdx,rax + mov rax,qword ptr [rdx] + mov rbx,3 + and rbx,rax + sub rbx,3 + jne copy_argument_part_2r + + mov qword ptr [rdx],rdi + sub rax,3 + att_jmp update_up_list_2r + +copy_argument_part_2r: + mov qword ptr [rdx],rdi + cmp rax,rcx + jb copy_record_argument_2 + + cmp rax,qword ptr end_heap_p3[rip] + att_jae copy_record_argument_2 + + mov rdx,rax + mov rax,qword ptr [rdx] + lea rbx,1[rdi] + mov qword ptr [rdx],rbx +copy_record_argument_2: + mov qword ptr [rdi],rax + add rdi,8 + + pop rbx + sub rbx,1 + jc no_pointers_in_record + +copy_record_pointers: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jb copy_record_pointers_2 + + cmp rdx,qword ptr end_heap_p3[rip] + att_jae copy_record_pointers_2 + + mov rax,qword ptr [rdx] + inc rdi + mov qword ptr [rdx],rdi + dec rdi + mov rdx,rax +copy_record_pointers_2: + mov qword ptr [rdi],rdx + add rdi,8 + sub rbx,1 + att_jnc copy_record_pointers + +no_pointers_in_record: + pop rbx + + sub rbx,1 + jc no_non_pointers_in_record + +copy_non_pointers_in_record: + mov rax,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rax + add rdi,8 + sub rbx,1 + att_jnc copy_non_pointers_in_record + +no_non_pointers_in_record: + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +no_record_arguments: + sub rbx,3 +update_up_list_2: + mov rdx,rax + mov rax,qword ptr [rax] + inc rax + mov qword ptr [rdx],rdi + test al,3 + jne copy_argument_part_2 + + sub rax,4 + att_jmp update_up_list_2 + +copy_argument_part_2: + dec rax + cmp rax,rcx + jc copy_arguments_1 + + cmp rax,qword ptr end_heap_p3[rip] + att_jnc copy_arguments_1 + + mov rdx,rax + mov rax,qword ptr [rax] + inc rdi + mov qword ptr [rdx],rdi + dec rdi +copy_arguments_1: + mov qword ptr [rdi],rax + add rdi,8 + +copy_argument_part_arguments: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc copy_arguments_2 + + cmp rdx,qword ptr end_heap_p3[rip] + att_jnc copy_arguments_2 + + mov rax,qword ptr [rdx] + inc rdi + mov qword ptr [rdx],rdi + dec rdi + mov rdx,rax +copy_arguments_2: + mov qword ptr [rdi],rdx + add rdi,8 + sub rbx,1 + att_jnc copy_argument_part_arguments + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +update_list_2_: + dec rax + mov qword ptr [rdx],rdi +begin_update_list_2: + mov rdx,rax + mov rax,qword ptr [rax] +update_list__2: + test rax,1 + jz end_update_list_2 + test rax,2 + att_jz update_list_2_ + lea rdx,(-3)[rax] + mov rax,qword ptr (-3)[rax] + att_jmp update_list__2 + +end_update_list_2: + mov qword ptr [rdx],rdi + + mov qword ptr [rdi],rax + add rdi,8 + + test al,2 + je move_lazy_node + + movzx rbx,word ptr (-2)[rax] + test rbx,rbx + je move_hnf_0 + + cmp rbx,256 + jae move_record + + sub rbx,2 + jc move_hnf_1 + je move_hnf_2 + +move_hnf_3: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_hnf_3_1 + + cmp rdx,qword ptr end_heap_p3[rip] + att_jnc move_hnf_3_1 + + lea rax,1[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_hnf_3_1: + mov qword ptr [rdi],rdx + + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_hnf_3_2 + + cmp rdx,qword ptr end_heap_p3[rip] + att_jnc move_hnf_3_2 + + lea rax,(8+2+1)[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_hnf_3_2: + mov qword ptr 8[rdi],rdx + add rdi,16 + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_hnf_2: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_hnf_2_1 + + cmp rdx,qword ptr end_heap_p3[rip] + att_jnc move_hnf_2_1 + + lea rax,1[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_hnf_2_1: + mov qword ptr [rdi],rdx + + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_hnf_2_2 + + cmp rdx,qword ptr end_heap_p3[rip] + att_jnc move_hnf_2_2 + + lea rax,(8+1)[rdi] + mov rbx ,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_hnf_2_2: + mov qword ptr 8[rdi],rdx + add rdi,16 + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_hnf_1: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_hnf_1_ + + cmp rdx,qword ptr end_heap_p3[rip] + att_jnc move_hnf_1_ + + lea rax,1[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_hnf_1_: + mov qword ptr [rdi],rdx + add rdi,8 + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_record: + sub rbx,258 + jb move_record_1 + je move_record_2 + +move_record_3: + movzx rbx,word ptr (-2+2)[rax] + sub rbx,1 + att_ja move_hnf_3 + + mov rdx,qword ptr [rcx] + lea rcx,8[rcx] + jb move_record_3_1b + +move_record_3_1a: + cmp rdx,rcx + att_jb move_record_3_1b + + cmp rdx,qword ptr end_heap_p3[rip] + att_jae move_record_3_1b + + lea rax,1[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_record_3_1b: + mov qword ptr [rdi],rdx + add rdi,8 + + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jb move_record_3_2 + + cmp rdx,qword ptr end_heap_p3[rip] + att_jae move_record_3_2 + + mov rax,qword ptr neg_heap_p3[rip] + + push rbp + + add rax,rdx + + mov rbx,qword ptr heap_vector[rip] + add rax,8 + mov rbp,rax + and rbp,31*8 + shr rax,8 + lea r9,bit_set_table2[rip] + mov ebp,dword ptr [r9+rbp] + test ebp,dword ptr [rbx+rax*4] + je not_linked_record_argument_part_3_b + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdi + + mov rbp,rax + and rbp,31*8 + shr rax,8 + mov ebp,dword ptr [r9+rbp] + or dword ptr [rbx+rax*4],ebp + pop rbp + + jmp linked_record_argument_part_3_b + +not_linked_record_argument_part_3_b: + or dword ptr [rbx+rax*4],ebp + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdi + + mov rbp,rax + and rbp,31*8 + shr rax,8 + lea r9,bit_clear_table2[rip] + mov ebp,dword ptr [r9+rbp] + and dword ptr [rbx+rax*4],ebp + pop rbp + +linked_record_argument_part_3_b: + mov rbx,qword ptr [rdx] + lea rax,(2+1)[rdi] + mov qword ptr [rdx],rax + mov rdx,rbx +move_record_3_2: + mov qword ptr [rdi],rdx + add rdi,8 + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,rcx + shr rbx,3 + dec rbx + and rbx,31 + cmp rbx,2 + jb bit_in_next_word + + shr esi,2 + add rbp,16 + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +bit_in_next_word: + dec r12 + mov esi,dword ptr [r8] + add r8,4 + + lea r9,bit_clear_table[rip] + and esi,dword ptr [r9+rbx*4] + + test rsi,rsi + att_je skip_zeros + att_jmp end_skip_zeros + +move_record_2: + cmp word ptr (-2+2)[rax],1 + att_ja move_hnf_2 + jb move_real_or_file + +move_record_2_ab: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jb move_record_2_1 + + cmp rdx,qword ptr end_heap_p3[rip] + att_jae move_record_2_1 + + lea rax,1[rdi] + mov rbx ,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_record_2_1: + mov qword ptr [rdi],rdx + mov rbx,qword ptr [rcx] + add rcx,8 + mov qword ptr 8[rdi],rbx + add rdi,16 + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_record_1: + movzx rbx,word ptr (-2+2)[rax] + test rbx,rbx + att_jne move_hnf_1 + jmp move_int_bool_or_char + +move_real_or_file: + mov rax ,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rax + add rdi,8 +move_int_bool_or_char: + mov rax,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rax + add rdi,8 +copy_normal_hnf_0: + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_hnf_0: + lea r9,dINT+2[rip] + cmp rax,r9 + jb move_real_file_string_or_array + lea r9,CHAR+2[rip] + cmp rax,r9 + att_jbe move_int_bool_or_char + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_real_file_string_or_array: + lea r9,__STRING__+2[rip] + cmp rax,r9 + att_ja move_real_or_file + jne move_array + + mov rax,qword ptr [rcx] + add rax,7 + shr rax,3 + +cp_s_arg_lp3: + mov rbx,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rbx + add rdi,8 + sub rax,1 + att_jnc cp_s_arg_lp3 + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_array: + test rsi,rsi + push rcx + jne bsf_and_end_array_bit + +skip_zeros_a: + sub r12,1 + mov esi,dword ptr [r8] + add r8,4 + test rsi,rsi + att_je skip_zeros_a + + mov rbp,qword ptr neg_heap_vector_plus_4[rip] + add rbp,r8 + + shl rbp,6 + + add rbp,qword ptr heap_p3[rip] + +bsf_and_end_array_bit: + mov rax,rsi + lea r9,first_one_bit_table[rip] + mov rdx,rsi + and rax,0x0ff + jne a_found_bit1 + and rdx,0x0ff00 + jne a_found_bit2 + mov rax,rsi + mov rdx,rsi + and rax,0x0ff0000 + jne a_found_bit3 + shr rdx,24 + movzx rcx,byte ptr [r9+rdx*1] + add rcx,24 + jmp end_array_bit +a_found_bit3: + shr rax,16 + movzx rcx,byte ptr [r9+rax*1] + add rcx,16 + att_jmp end_array_bit +a_found_bit2: + shr rdx,8 + movzx rcx,byte ptr [r9+rdx*1] + add rcx,8 + att_jmp end_array_bit +a_found_bit1: + movzx rcx,byte ptr [r9+rax*1] + +end_array_bit: + lea rbx,[rbp+rcx*8] + shr esi,1 + lea rbp,8[rbp+rcx*8] + shr esi,cl + pop rcx + + cmp rcx,rbx + jne move_a_array + +move_b_array: + mov rdx,qword ptr [rcx] + mov qword ptr [rdi],rdx + mov rbx,qword ptr 8[rcx] + add rcx,8 + + movzx rax,word ptr (-2)[rbx] + add rdi,8 + test rax,rax + je move_strict_basic_array + + sub rax,256 + imul rdx,rax + mov rax,rdx + att_jmp cp_s_arg_lp3 + +move_strict_basic_array: + mov rax,rdx + lea r9,dINT+2[rip] + cmp rbx,r9 + att_jle cp_s_arg_lp3 + lea r9,BOOL+2[rip] + cmp rbx,r9 + je move_bool_array + +move_int32_or_real32_array: + add rax,1 + shr rax,1 + att_jmp cp_s_arg_lp3 + +move_bool_array: + add rax,7 + shr rax,3 + att_jmp cp_s_arg_lp3 + +move_a_array: + mov rdx,rbx + sub rbx,rcx + shr rbx,3 + + push rsi + sub rbx,1 + jb end_array + mov rsi,qword ptr [rcx] + + mov rax,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rsi + + mov qword ptr [rdi],rax + + mov rax,qword ptr [rdx] + + mov rsi,qword ptr 8[rcx] + add rcx,16 + + mov qword ptr [rdx],rsi + + mov qword ptr 8[rdi],rax + add rdi,16 + + test rax,rax + je st_move_array_lp + + movzx rsi,word ptr (-2+2)[rax] + movzx rax,word ptr (-2)[rax] + sub rax,256 + cmp rax,rsi + att_je st_move_array_lp + +move_array_ab: + push rcx + + mov rdx,qword ptr (-16)[rdi] + mov rbx,rsi + imul rdx,rax + shl rdx,3 + + sub rax,rbx + add rdx,rcx + att_call reorder + + pop rcx + sub rbx,1 + sub rax,1 + + push rbx + push rax + push (-16)[rdi] + jmp st_move_array_lp_ab + +move_array_ab_lp1: + mov rax,qword ptr 16[rsp] +move_array_ab_a_elements: + mov rbx,qword ptr [rcx] + add rcx,8 + cmp rbx,rcx + jb move_array_element_ab + + cmp rbx,qword ptr end_heap_p3[rip] + att_jnc move_array_element_ab + + mov rdx,rbx + mov rbx,qword ptr [rdx] + inc rdi + mov qword ptr [rdx],rdi + dec rdi +move_array_element_ab: + mov qword ptr [rdi],rbx + add rdi,8 + sub rax,1 + att_jnc move_array_ab_a_elements + + mov rax,qword ptr 8[rsp] +move_array_ab_b_elements: + mov rbx,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rbx + add rdi,8 + sub rax,1 + att_jnc move_array_ab_b_elements + +st_move_array_lp_ab: + sub qword ptr [rsp],1 + att_jnc move_array_ab_lp1 + + add rsp,24 + att_jmp end_array + +move_array_lp1: + mov rax,qword ptr [rcx] + add rcx,8 + add rdi,8 + cmp rax,rcx + jb move_array_element + + cmp rax,qword ptr end_heap_p3[rip] + att_jnc move_array_element + + mov rsi,qword ptr [rax] + mov rdx,rax + mov qword ptr (-8)[rdi],rsi + lea rax,(-8+1)[rdi] + mov qword ptr [rdx],rax + + sub rbx,1 + att_jnc move_array_lp1 + + att_jmp end_array + +move_array_element: + mov qword ptr (-8)[rdi],rax +st_move_array_lp: + sub rbx,1 + att_jnc move_array_lp1 + +end_array: + pop rsi + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_lazy_node: + mov rdx,rax + movsxd rbx,dword ptr (-4)[rdx] + test rbx,rbx + je move_lazy_node_0 + + sub rbx,1 + jle move_lazy_node_1 + + cmp rbx,256 + jge move_closure_with_unboxed_arguments + +move_lazy_node_arguments: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_lazy_node_arguments_ + + cmp rdx,qword ptr end_heap_p3[rip] + att_jnc move_lazy_node_arguments_ + + mov rax,qword ptr [rdx] + mov qword ptr [rdi],rax + lea rax,1[rdi] + add rdi,8 + mov qword ptr [rdx],rax + sub rbx,1 + att_jnc move_lazy_node_arguments + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_lazy_node_arguments_: + mov qword ptr [rdi],rdx + add rdi,8 + sub rbx,1 + att_jnc move_lazy_node_arguments + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_lazy_node_1: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_lazy_node_1_ + + cmp rdx,qword ptr end_heap_p3[rip] + att_jnc move_lazy_node_1_ + + lea rax,1[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_lazy_node_1_: + mov qword ptr [rdi],rdx + add rdi,16 + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_lazy_node_0: + add rdi,16 + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_closure_with_unboxed_arguments: + je move_closure_with_unboxed_arguments_1 + add rbx,1 + mov rax,rbx + and rbx,255 + shr rax,8 + sub rbx,rax + je move_non_pointers_of_closure + + push rax + +move_closure_with_unboxed_arguments_lp: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_closure_with_unboxed_arguments_ + + cmp rdx,qword ptr end_heap_p3[rip] + att_jnc move_closure_with_unboxed_arguments_ + + mov rax,qword ptr [rdx] + mov qword ptr [rdi],rax + lea rax,1[rdi] + add rdi,8 + mov qword ptr [rdx],rax + sub rbx,1 + att_jne move_closure_with_unboxed_arguments_lp + + pop rax + att_jmp move_non_pointers_of_closure + +move_closure_with_unboxed_arguments_: + mov qword ptr [rdi],rdx + add rdi,8 + sub rbx,1 + att_jne move_closure_with_unboxed_arguments_lp + + pop rax + +move_non_pointers_of_closure: + mov rbx,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rbx + add rdi,8 + sub rax,1 + att_jne move_non_pointers_of_closure + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +move_closure_with_unboxed_arguments_1: + mov rax,qword ptr [rcx] + mov qword ptr [rdi],rax + add rdi,16 + + test rsi,rsi + att_jne bsf_and_copy_nodes + att_jmp find_non_zero_long + +end_move: + + mov rcx,qword ptr finalizer_list[rip] + +restore_finalizer_descriptors: + lea r9,__Nil-8[rip] + cmp rcx,r9 + je end_restore_finalizer_descriptors + + lea r9,e____system__kFinalizer+2[rip] + mov qword ptr [rcx],r9 + mov rcx,qword ptr 8[rcx] + att_jmp restore_finalizer_descriptors + +end_restore_finalizer_descriptors: + diff --git a/macho64/acompact_rmark.s b/macho64/acompact_rmark.s new file mode 100644 index 0000000..359fe1b --- /dev/null +++ b/macho64/acompact_rmark.s @@ -0,0 +1,953 @@ + +rmark_stack_nodes1: + mov rbx,qword ptr [rcx] + lea rax,1[rsi] + mov qword ptr [rsi],rbx + mov qword ptr [rcx],rax + +rmark_next_stack_node: + add rsi,8 + cmp rsi,qword ptr end_vector[rip] + je end_rmark_nodes + +rmark_stack_nodes: + mov rcx,qword ptr [rsi] + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmark_next_stack_node + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebp,dword ptr [rdi+rbx*4] + test rbp,rax + att_jne rmark_stack_nodes1 + + or rbp,rax + mov dword ptr [rdi+rbx*4],ebp + + mov rax,qword ptr [rcx] + call rmark_stack_node + + add rsi,8 + cmp rsi,qword ptr end_vector[rip] + att_jne rmark_stack_nodes + ret + +rmark_stack_node: + sub rsp,16 + mov qword ptr [rsi],rax + lea rbp,1[rsi] + mov qword ptr 8[rsp],rsi + mov rbx,-1 + mov qword ptr [rsp],0 + mov qword ptr [rcx],rbp + jmp rmark_no_reverse + +rmark_node_d1: + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + jnc rmark_next_node + + jmp rmark_node_ + +rmark_hnf_2: + lea rbx,8[rcx] + mov rax,qword ptr 8[rcx] + sub rsp,16 + + mov rsi,rcx + mov rcx,qword ptr [rcx] + + mov qword ptr 8[rsp],rbx + mov qword ptr [rsp],rax + + cmp rsp,qword ptr end_stack[rip] + jb rmark_using_reversal + +rmark_node: + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmark_next_node + + mov rbx,rsi + +rmark_node_: + mov rdx,rax + and rax,31*8 + shr rdx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebp,dword ptr [rdi+rdx*4] + test rbp,rax + jne rmark_reverse_and_mark_next_node + + or rbp,rax + mov dword ptr [rdi+rdx*4],ebp + + mov rax,qword ptr [rcx] +rmark_arguments: + cmp rcx,rbx + att_ja rmark_no_reverse + + lea rbp,1[rsi] + mov qword ptr [rsi],rax + mov qword ptr [rcx],rbp + +rmark_no_reverse: + test al,2 + je rmark_lazy_node + + movzx rbp,word ptr (-2)[rax] + test rbp,rbp + je rmark_hnf_0 + + add rcx,8 + + cmp rbp,256 + jae rmark_record + + sub rbp,2 + att_je rmark_hnf_2 + jc rmark_hnf_1 + +rmark_hnf_3: + mov rdx,qword ptr 8[rcx] +rmark_hnf_3_: + cmp rsp,qword ptr end_stack[rip] + jb rmark_using_reversal_ + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,[rdi+rbx*4] + jne rmark_shared_argument_part + + or dword ptr [rdi+rbx*4],eax + +rmark_no_shared_argument_part: + sub rsp,16 + mov qword ptr 8[rsp],rcx + lea rsi,8[rcx] + mov rcx,qword ptr [rcx] + lea rdx,[rdx+rbp*8] + mov qword ptr [rsp],rcx + +rmark_push_hnf_args: + mov rbx,qword ptr [rdx] + sub rsp,16 + mov qword ptr 8[rsp],rdx + sub rdx,8 + mov qword ptr [rsp],rbx + + sub rbp,1 + att_jg rmark_push_hnf_args + + mov rcx,qword ptr [rdx] + + cmp rdx,rsi + ja rmark_no_reverse_argument_pointer + + lea rbp,3[rsi] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rbp + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmark_next_node + + mov rbx,rdx + att_jmp rmark_node_ + +rmark_no_reverse_argument_pointer: + mov rsi,rdx + att_jmp rmark_node + +rmark_shared_argument_part: + cmp rdx,rcx + att_ja rmark_hnf_1 + + mov rbx,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr [rdx],rax + mov qword ptr 8[rcx],rbx + att_jmp rmark_hnf_1 + +rmark_record: + sub rbp,258 + je rmark_record_2 + jb rmark_record_1 + +rmark_record_3: + movzx rbp,word ptr (-2+2)[rax] + mov rdx,qword ptr (16-8)[rcx] + sub rbp,1 + jb rmark_record_3_bb + je rmark_record_3_ab + sub rbp,1 + je rmark_record_3_aab + att_jmp rmark_hnf_3_ + +rmark_record_3_bb: + sub rcx,8 + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + cmp rdx,rcx + att_ja rmark_next_node + + add eax,eax + jne rmark_bit_in_same_word1 + inc rbp + mov rax,1 +rmark_bit_in_same_word1: + test eax,dword ptr [rdi+rbp*4] + je rmark_not_yet_linked_bb + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + add rax,16 + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + mov rbp,qword ptr [rdx] + lea rax,(16+2+1)[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmark_next_node + +rmark_not_yet_linked_bb: + or dword ptr [rdi+rbp*4],eax + mov rbp,qword ptr [rdx] + lea rax,(16+2+1)[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmark_next_node + +rmark_record_3_ab: + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + cmp rdx,rcx + att_ja rmark_hnf_1 + + add eax,eax + jne rmark_bit_in_same_word2 + inc rbp + mov rax,1 +rmark_bit_in_same_word2: + test eax,dword ptr [rdi+rbp*4] + je rmark_not_yet_linked_ab + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + add rax,8 + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + mov rbp,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr 8[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmark_hnf_1 + +rmark_not_yet_linked_ab: + or dword ptr [rdi+rbp*4],eax + mov rbp,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr 8[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmark_hnf_1 + +rmark_record_3_aab: + cmp rsp,qword ptr end_stack[rip] + att_jb rmark_using_reversal_ + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,dword ptr [rdi+rbp*4] + att_jne rmark_shared_argument_part + or dword ptr [rdi+rbp*4],eax + + sub rsp,16 + mov qword ptr 8[rsp],rcx + lea rsi,8[rcx] + mov rcx,qword ptr [rcx] + mov qword ptr [rsp],rcx + + mov rcx,qword ptr [rdx] + + cmp rdx,rsi + att_ja rmark_no_reverse_argument_pointer + + lea rbp,3[rsi] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rbp + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmark_next_node + + mov rbx,rdx + att_jmp rmark_node_ + +rmark_record_2: + cmp word ptr (-2+2)[rax],1 + att_ja rmark_hnf_2 + att_je rmark_hnf_1 + att_jmp rmark_next_node + +rmark_record_1: + cmp word ptr (-2+2)[rax],0 + att_jne rmark_hnf_1 + att_jmp rmark_next_node + +rmark_lazy_node_1: +/* selectors: */ + jne rmark_selector_node_1 + +rmark_hnf_1: + mov rsi,rcx + mov rcx,qword ptr [rcx] + att_jmp rmark_node + +/* selectors */ +rmark_indirection_node: + mov rdx,qword ptr neg_heap_p3[rip] + sub rcx,8 + add rdx,rcx + + mov rbp,rdx + and rbp,31*8 + shr rdx,8 + lea r9,bit_clear_table2[rip] + mov ebp,dword ptr [r9+rbp] + and dword ptr [rdi+rdx*4],ebp + + mov rdx,rcx + cmp rcx,rbx + mov rcx,qword ptr 8[rcx] + mov qword ptr [rsi],rcx + att_ja rmark_node_d1 + mov qword ptr [rdx],rax + att_jmp rmark_node_d1 + +rmark_selector_node_1: + add rbp,3 + att_je rmark_indirection_node + + mov rdx,qword ptr [rcx] + mov qword ptr pointer_compare_address[rip],rbx + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,rdx + shr rbx,3 + + add rbp,1 + jle rmark_record_selector_node_1 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + lea r9,bit_set_table[rip] + mov ebp,dword ptr [r9+rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmark_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + att_je rmark_hnf_1 + + cmp word ptr (-2)[rbx],2 + jbe rmark_small_tuple_or_record + +rmark_large_tuple_or_record: + mov r10,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,r10 + shr rbx,3 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + lea r9,bit_set_table[rip] + mov ebp,dword ptr [r9+rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmark_hnf_1 + + mov rbx,qword ptr neg_heap_p3[rip] + lea rbx,(-8)[rcx+rbx] + + movsxd r11,dword ptr (-8)[rax] + add rax,r11 + + mov r11,rbx + and r11,31*8 + shr rbx,8 + lea r9,bit_clear_table2[rip] + mov r11d,dword ptr [r9+r11] + and dword ptr [rdi+rbx*4],r11d + + movzx eax,word ptr (4-8)[rax] + mov rbx,qword ptr pointer_compare_address[rip] + + lea r9,__indirection[rip] + mov qword ptr (-8)[rcx],r9 + + cmp rax,16 + jl rmark_tuple_or_record_selector_node_2 + + mov rdx,rcx + je rmark_tuple_selector_node_2 + + mov rcx,qword ptr (-24)[r10+rax] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rcx + att_jmp rmark_node_d1 + +rmark_tuple_selector_node_2: + mov rcx,qword ptr [r10] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rcx + att_jmp rmark_node_d1 + +rmark_record_selector_node_1: + je rmark_strict_record_selector_node_1 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + lea r9,bit_set_table[rip] + mov ebp,dword ptr [r9+rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmark_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + att_je rmark_hnf_1 + + cmp word ptr (-2)[rbx],258 + att_jbe rmark_small_tuple_or_record + + mov r10,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,r10 + shr rbx,3 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + lea r9,bit_set_table[rip] + mov ebp,dword ptr [r9+rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmark_hnf_1 + +rmark_small_tuple_or_record: + mov rbx,qword ptr neg_heap_p3[rip] + lea rbx,(-8)[rcx+rbx] + + movsxd r11,dword ptr(-8)[rax] + add rax,r11 + + mov r11,rbx + and r11,31*8 + shr rbx,8 + lea r9,bit_clear_table2[rip] + mov r11d,dword ptr [r9+r11] + and dword ptr [rdi+rbx*4],r11d + + movzx eax,word ptr (4-8)[rax] + mov rbx,qword ptr pointer_compare_address[rip] + + lea r9,__indirection[rip] + mov qword ptr (-8)[rcx],r9 + + cmp rax,16 + att_jle rmark_tuple_or_record_selector_node_2 + mov rdx,r10 + sub rax,24 +rmark_tuple_or_record_selector_node_2: + mov rbp,rcx + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rsi],rcx + mov qword ptr [rbp],rcx + mov rdx,rbp + att_jmp rmark_node_d1 + +rmark_strict_record_selector_node_1: + mov rbp,rbx + shr rbx,5 + and rbp,31 + lea r9,bit_set_table[rip] + mov ebp,dword ptr [r9+rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmark_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + att_je rmark_hnf_1 + + cmp word ptr (-2)[rbx],258 + jbe rmark_select_from_small_record + + mov r10,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,r10 + mov rbp,rbx + + shr rbx,8 + and rbp,31*8 + lea r9,bit_set_table2[rip] + mov ebp,dword ptr [r9+rbp] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmark_hnf_1 + +rmark_select_from_small_record: + movsxd rbx,dword ptr(-8)[rax] + add rbx,rax + sub rcx,8 + + cmp rcx,qword ptr pointer_compare_address[rip] + ja rmark_selector_pointer_not_reversed + + movzx eax,word ptr (4-8)[rbx] + cmp rax,16 + jle rmark_strict_record_selector_node_2 + mov rax,qword ptr (-24)[r10+rax] + jmp rmark_strict_record_selector_node_3 +rmark_strict_record_selector_node_2: + mov rax,qword ptr [rdx+rax] +rmark_strict_record_selector_node_3: + mov qword ptr 8[rcx],rax + + movzx eax,word ptr (6-8)[rbx] + test rax,rax + je rmark_strict_record_selector_node_5 + cmp rax,16 + jle rmark_strict_record_selector_node_4 + mov rdx,r10 + sub rax,24 +rmark_strict_record_selector_node_4: + mov rax,qword ptr [rdx+rax] + mov qword ptr 16[rcx],rax +rmark_strict_record_selector_node_5: + + mov rax,qword ptr ((-8)-8)[rbx] + + add rsi,1 + mov qword ptr [rcx],rsi + mov qword ptr (-1)[rsi],rax + att_jmp rmark_next_node + +rmark_selector_pointer_not_reversed: + movzx eax,word ptr (4-8)[rbx] + cmp rax,16 + jle rmark_strict_record_selector_node_6 + mov rax,qword ptr (-24)[r10+rax] + jmp rmark_strict_record_selector_node_7 +rmark_strict_record_selector_node_6: + mov rax,qword ptr [rdx+rax] +rmark_strict_record_selector_node_7: + mov qword ptr 8[rcx],rax + + movzx eax,word ptr (6-8)[rbx] + test rax,rax + je rmark_strict_record_selector_node_9 + cmp rax,16 + jle rmark_strict_record_selector_node_8 + mov rdx,r10 + sub rax,24 +rmark_strict_record_selector_node_8: + mov rax,qword ptr [rdx+rax] + mov qword ptr 16[rcx],rax +rmark_strict_record_selector_node_9: + + mov rax,qword ptr ((-8)-8)[rbx] + mov qword ptr [rcx],rax + att_jmp rmark_next_node + +rmark_reverse_and_mark_next_node: + cmp rcx,rbx + att_ja rmark_next_node + + mov rax,qword ptr [rcx] + mov qword ptr [rsi],rax + add rsi,1 + mov qword ptr [rcx],rsi + +/* %rbp ,%rbx : free */ + +rmark_next_node: + mov rcx,qword ptr [rsp] + mov rsi,qword ptr 8[rsp] + add rsp,16 + + cmp rcx,1 + att_ja rmark_node + +rmark_next_node_: +end_rmark_nodes: + ret + +rmark_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + att_je rmark_next_node + + add rcx,8 + + sub rbp,1 + att_jle rmark_lazy_node_1 + + cmp rbp,255 + jge rmark_closure_with_unboxed_arguments + +rmark_closure_with_unboxed_arguments_: + lea rcx,[rcx+rbp*8] + +rmark_push_lazy_args: + mov rbx,qword ptr [rcx] + sub rsp,16 + mov qword ptr 8[rsp],rcx + sub rcx,8 + mov qword ptr [rsp],rbx + sub rbp,1 + att_jg rmark_push_lazy_args + + mov rsi,rcx + mov rcx,qword ptr [rcx] + + cmp rsp,qword ptr end_stack[rip] + att_jae rmark_node + + att_jmp rmark_using_reversal + +rmark_closure_with_unboxed_arguments: +/* (a_size+b_size)+(b_size<<8) */ +/* addl $1,%rbp */ + mov rax,rbp + and rbp,255 + shr rax,8 + sub rbp,rax +/* subl $1,%rbp */ + att_jg rmark_closure_with_unboxed_arguments_ + att_je rmark_hnf_1 + att_jmp rmark_next_node + +rmark_hnf_0: + lea r9,dINT+2[rip] + cmp rax,r9 + je rmark_int_3 + + lea r9,CHAR+2[rip] + cmp rax,r9 + je rmark_char_3 + + jb rmark_no_normal_hnf_0 + + mov rbp,qword ptr neg_heap_p3[rip] + add rbp,rcx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + lea r9,bit_clear_table2[rip] + mov edx,dword ptr [r9+rdx] + and dword ptr [rdi+rbp*4],edx + + lea rdx,((-8)-2)[rax] + mov qword ptr [rsi],rdx + cmp rcx,rbx + att_ja rmark_next_node + mov qword ptr [rcx],rax + att_jmp rmark_next_node + +rmark_int_3: + mov rbp,qword ptr 8[rcx] + cmp rbp,33 + att_jnc rmark_next_node + + shl rbp,4 + lea rdx,small_integers[rip] + add rdx,rbp + mov rbp,qword ptr neg_heap_p3[rip] + mov qword ptr [rsi],rdx + add rbp,rcx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + lea r9,bit_clear_table2[rip] + mov edx,dword ptr [r9+rdx] + and dword ptr [rdi+rbp*4],edx + + cmp rcx,rbx + att_ja rmark_next_node + mov qword ptr [rcx],rax + att_jmp rmark_next_node + +rmark_char_3: + movzx rdx,byte ptr 8[rcx] + mov rbp,qword ptr neg_heap_p3[rip] + + shl rdx,4 + add rbp,rcx + lea r9,static_characters[rip] + add rdx,r9 + mov qword ptr [rsi],rdx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + lea r9,bit_clear_table2[rip] + mov edx,dword ptr [r9+rdx] + and dword ptr [rdi+rbp*4],edx + + cmp rcx,rbx + att_ja rmark_next_node + mov qword ptr [rcx],rax + att_jmp rmark_next_node + +rmark_no_normal_hnf_0: + lea r9,__ARRAY__+2[rip] + cmp rax,r9 + att_jne rmark_next_node + + mov rax,qword ptr 16[rcx] + test rax,rax + je rmark_lazy_array + + movzx rdx,word ptr (-2+2)[rax] + test rdx,rdx + je rmark_b_array + + movzx rax,word ptr (-2)[rax] + test rax,rax + att_je rmark_b_array + + cmp rsp,qword ptr end_stack[rip] + jb rmark_array_using_reversal + + sub rax,256 + cmp rdx,rax + mov rbx,rdx + je rmark_a_record_array + +rmark_ab_record_array: + mov rdx,qword ptr 8[rcx] + add rcx,16 + push rcx + + imul rdx,rax + shl rdx,3 + + sub rax,rbx + add rcx,8 + add rdx,rcx + att_call reorder + + pop rcx + mov rax,rbx + imul rax,qword ptr (-8)[rcx] + jmp rmark_lr_array + +rmark_b_array: + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + add rax,8 + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + att_jmp rmark_next_node + +rmark_a_record_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + cmp rbx,2 + att_jb rmark_lr_array + + imul rax,rbx + att_jmp rmark_lr_array + +rmark_lazy_array: + cmp rsp,qword ptr end_stack[rip] + att_jb rmark_array_using_reversal + + mov rax,qword ptr 8[rcx] + add rcx,16 + +rmark_lr_array: + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,rcx + shr rbx,3 + add rbx,rax + + mov rdx,rbx + and rbx,31 + shr rdx,5 + lea r9,bit_set_table[rip] + mov ebx,dword ptr [r9+rbx*4] + or dword ptr [rdi+rdx*4],ebx + + cmp rax,1 + jbe rmark_array_length_0_1 + + mov rdx,rcx + lea rcx,[rcx+rax*8] + + mov rax,qword ptr [rcx] + + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + + mov qword ptr [rcx],rbx + + mov rax,qword ptr (-8)[rcx] + sub rcx,8 + + mov rbx,qword ptr (-8)[rdx] + + sub rdx,8 + mov qword ptr [rcx],rbx + + mov qword ptr [rdx],rax + + push rcx + mov rsi,rdx + jmp rmark_array_nodes + +rmark_array_nodes1: + cmp rcx,rsi + ja rmark_next_array_node + + mov rbx,qword ptr [rcx] + lea rax,1[rsi] + mov qword ptr [rsi],rbx + mov qword ptr [rcx],rax + +rmark_next_array_node: + add rsi,8 + cmp rsi,qword ptr [rsp] + je end_rmark_array_node + +rmark_array_nodes: + mov rcx,qword ptr [rsi] + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmark_next_array_node + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebp,dword ptr [rdi+rbx*4] + test rbp,rax + att_jne rmark_array_nodes1 + + or rbp,rax + mov dword ptr [rdi+rbx*4],ebp + + mov rax,qword ptr [rcx] + call rmark_array_node + + add rsi,8 + cmp rsi,qword ptr [rsp] + att_jne rmark_array_nodes + +end_rmark_array_node: + add rsp,8 + att_jmp rmark_next_node + +rmark_array_node: + sub rsp,16 + mov qword ptr 8[rsp],rsi + mov rbx,rsi + mov qword ptr [rsp],1 + att_jmp rmark_arguments + +rmark_array_length_0_1: + lea rcx,-16[rcx] + att_jb rmark_next_node + + mov rbx,qword ptr 24[rcx] + mov rbp,qword ptr 16[rcx] + mov qword ptr 24[rcx],rbp + mov rbp,qword ptr 8[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr 8[rcx],rbx + add rcx,8 + att_jmp rmark_hnf_1 + + + .data +pointer_compare_address: + .quad 0 + + .text diff --git a/macho64/acompact_rmark_prefetch.s b/macho64/acompact_rmark_prefetch.s new file mode 100644 index 0000000..4ca3ec5 --- /dev/null +++ b/macho64/acompact_rmark_prefetch.s @@ -0,0 +1,1181 @@ + + + .data +rmarkp_n_queue_items_16: + .quad 0 +rmarkp_queue_first: + .quad 0 +rmarkp_queue: + .quad 0,0,0,0,0,0,0,0 + .quad 0,0,0,0,0,0,0,0 + .quad 0,0,0,0,0,0,0,0 + .quad 0,0,0,0,0,0,0,0 + + .text + +rmarkp_stack_nodes1: + mov rbx,qword ptr [rcx] + lea rax,1[rsi] + mov qword ptr [rsi],rbx + mov qword ptr [rcx],rax + +rmarkp_next_stack_node: + add rsi,8 + cmp rsi,qword ptr end_vector[rip] + je end_rmarkp_nodes + +rmarkp_stack_nodes: + mov rcx,qword ptr [rsi] + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmarkp_next_stack_node + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebp,dword ptr [rdi+rbx*4] + test rbp,rax + att_jne rmarkp_stack_nodes1 + + or rbp,rax + mov dword ptr [rdi+rbx*4],ebp + + mov rax,qword ptr [rcx] + call rmarkp_stack_node + + add rsi,8 + cmp rsi,qword ptr end_vector[rip] + att_jne rmarkp_stack_nodes + ret + +rmarkp_stack_node: + sub rsp,16 + mov qword ptr [rsi],rax + lea rbp,1[rsi] + mov qword ptr 8[rsp],rsi + mov rbx,-1 + mov qword ptr [rsp],0 + mov qword ptr [rcx],rbp + jmp rmarkp_no_reverse + +rmarkp_node_d1: + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + jnc rmarkp_next_node + + jmp rmarkp_node_ + +rmarkp_hnf_2: + lea rbx,8[rcx] + mov rax,qword ptr 8[rcx] + sub rsp,16 + + mov rsi,rcx + mov rcx,qword ptr [rcx] + + mov qword ptr 8[rsp],rbx + mov qword ptr [rsp],rax + + cmp rsp,qword ptr end_stack[rip] + att_jb rmark_using_reversal + +rmarkp_node: + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmarkp_next_node + + mov rbx,rsi + +rmarkp_node_: + + + + mov rdx,rax + and rax,31*8 + shr rdx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,dword ptr [rdi+rdx*4] + jne rmarkp_reverse_and_mark_next_node + + mov rbp,qword ptr rmarkp_queue_first[rip] + mov rdx,qword ptr rmarkp_n_queue_items_16[rip] + + prefetch [rcx] + lea r9,rmarkp_queue[rip] + mov qword ptr [r9+rbp],rcx + mov qword ptr 8[r9+rbp],rsi + mov qword ptr 16[r9+rbp],rbx + lea rbx,[rbp+rdx] + add rbp,32 + + and rbp,7*32 + and rbx,7*32 + + mov qword ptr rmarkp_queue_first[rip],rbp + + cmp rdx,-4*32 + je rmarkp_last_item_in_queue + +rmarkp_add_items: + mov rcx,[rsp] + cmp rcx,1 + jbe rmarkp_add_stacked_item + + mov rsi,8[rsp] + add rsp,16 + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmarkp_add_items + + mov rdx,rax + and rax,31*8 + shr rdx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebp,dword ptr [rdi+rdx*4] + test rbp,rax + je rmarkp_add_item + + cmp rcx,rsi + att_ja rmarkp_add_items + + mov rax,[rcx] + mov [rsi],rax + add rsi,1 + mov [rcx],rsi + att_jmp rmarkp_add_items + +rmarkp_add_stacked_item: + att_je rmarkp_last_item_in_queue +rmarkp_add_items2: + mov rsi,8[rsp] + add rsi,8 + cmp rsi,qword ptr end_vector[rip] + att_je rmarkp_last_item_in_queue + + mov rcx,[rsi] + mov 8[rsp],rsi + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmarkp_add_items2 + + mov rdx,rax + and rax,31*8 + shr rdx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebp,dword ptr [rdi+rdx*4] + test rbp,rax + je rmarkp_add_item2 + + mov rax,[rcx] + mov [rsi],rax + add rsi,1 + mov [rcx],rsi + att_jmp rmarkp_add_items2 + +rmarkp_add_item2: + prefetch [rcx] + mov rbp,qword ptr rmarkp_queue_first[rip] + mov rdx,qword ptr rmarkp_n_queue_items_16[rip] + + lea r9,rmarkp_queue[rip] + mov qword ptr [r9+rbp],rcx + mov qword ptr 8[r9+rbp],rsi + mov qword ptr 16[r9+rbp],-1 + add rbp,32 + and rbp,7*32 + + sub rdx,32 + + mov qword ptr rmarkp_queue_first[rip],rbp + mov qword ptr rmarkp_n_queue_items_16[rip],rdx + + cmp rdx,-4*32 + att_jne rmarkp_add_items2 + att_jmp rmarkp_last_item_in_queue + +rmarkp_add_items3: + mov rsi,8[rsp] + add rsi,8 + cmp rsi,24[rsp] + att_je rmarkp_last_item_in_queue + + mov rcx,[rsi] + mov 8[rsp],rsi + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmarkp_add_items3 + + mov rdx,rax + and rax,31*8 + shr rdx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebp,[rdi+rdx*4] + test rbp,rax + je rmarkp_add_item3 + + cmp rcx,rsi + att_ja rmarkp_add_items3 + + mov rax,[rcx] + mov [rsi],rax + add rsi,1 + mov [rcx],rsi + att_jmp rmarkp_add_items3 + +rmarkp_add_item3: + prefetch [rcx] + mov rbp,qword ptr rmarkp_queue_first[rip] + mov rdx,qword ptr rmarkp_n_queue_items_16[rip] + + lea r9,rmarkp_queue[rip] + mov qword ptr 8[r9+rbp],rsi + mov qword ptr 16[r9+rbp],rsi + add rbp,32 + and rbp,7*32 + + sub rdx,32 + + mov qword ptr rmarkp_queue_first[rip],rbp + mov qword ptr rmarkp_n_queue_items_16[rip],rdx + + cmp rdx,-4*32 + att_jne rmarkp_add_items3 + att_jmp rmarkp_last_item_in_queue + +rmarkp_add_item: + prefetch [rcx] + mov rbp,qword ptr rmarkp_queue_first[rip] + mov rdx,qword ptr rmarkp_n_queue_items_16[rip] + + lea r9,rmarkp_queue[rip] + mov qword ptr [r9+rbp],rcx + mov qword ptr 8[r9+rbp],rsi + mov qword ptr 16[r9+rbp],rsi + add rbp,32 + and rbp,7*32 + + sub rdx,32 + + mov qword ptr rmarkp_queue_first[rip],rbp + mov qword ptr rmarkp_n_queue_items_16[rip],rdx + + cmp rdx,-4*32 + att_jne rmarkp_add_items + +rmarkp_last_item_in_queue: + lea r9,rmarkp_queue[rip] + mov rcx,qword ptr [r9+rbx] + + mov rax,qword ptr neg_heap_p3[rip] + + mov rsi,qword ptr 8[r9+rbx] + mov rbx,qword ptr 16[r9+rbx] + + add rax,rcx + +rmarkp_node_no_prefetch: + + + + mov rdx,rax + and rax,31*8 + shr rdx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebp,dword ptr [rdi+rdx*4] + test rbp,rax + att_jne rmarkp_reverse_and_mark_next_node + + or rbp,rax + mov dword ptr [rdi+rdx*4],ebp + + mov rax,qword ptr [rcx] +rmarkp_arguments: + cmp rcx,rbx + att_ja rmarkp_no_reverse + + lea rbp,1[rsi] + mov qword ptr [rsi],rax + mov qword ptr [rcx],rbp + +rmarkp_no_reverse: + test al,2 + je rmarkp_lazy_node + + movzx rbp,word ptr (-2)[rax] + test rbp,rbp + je rmarkp_hnf_0 + + add rcx,8 + + cmp rbp,256 + jae rmarkp_record + + sub rbp,2 + att_je rmarkp_hnf_2 + jc rmarkp_hnf_1 + +rmarkp_hnf_3: + mov rdx,qword ptr 8[rcx] +rmarkp_hnf_3_: + cmp rsp,qword ptr end_stack[rip] + att_jb rmark_using_reversal_ + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,[rdi+rbx*4] + jne rmarkp_shared_argument_part + + or dword ptr [rdi+rbx*4],eax + +rmarkp_no_shared_argument_part: + sub rsp,16 + mov qword ptr 8[rsp],rcx + lea rsi,8[rcx] + mov rcx,qword ptr [rcx] + lea rdx,[rdx+rbp*8] + mov qword ptr [rsp],rcx + +rmarkp_push_hnf_args: + mov rbx,qword ptr [rdx] + sub rsp,16 + mov qword ptr 8[rsp],rdx + sub rdx,8 + mov qword ptr [rsp],rbx + + sub rbp,1 + att_jg rmarkp_push_hnf_args + + mov rcx,qword ptr [rdx] + + cmp rdx,rsi + ja rmarkp_no_reverse_argument_pointer + + lea rbp,3[rsi] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rbp + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmarkp_next_node + + mov rbx,rdx + att_jmp rmarkp_node_ + +rmarkp_no_reverse_argument_pointer: + mov rsi,rdx + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmarkp_next_node + mov rbx,rsi + att_jmp rmarkp_node_no_prefetch + +rmarkp_shared_argument_part: + cmp rdx,rcx + att_ja rmarkp_hnf_1 + + mov rbx,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr [rdx],rax + mov qword ptr 8[rcx],rbx + att_jmp rmarkp_hnf_1 + +rmarkp_record: + sub rbp,258 + je rmarkp_record_2 + jb rmarkp_record_1 + +rmarkp_record_3: + movzx rbp,word ptr (-2+2)[rax] + mov rdx,qword ptr (16-8)[rcx] + sub rbp,1 + jb rmarkp_record_3_bb + je rmarkp_record_3_ab + sub rbp,1 + je rmarkp_record_3_aab + att_jmp rmarkp_hnf_3_ + +rmarkp_record_3_bb: + sub rcx,8 + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + cmp rdx,rcx + att_ja rmarkp_next_node + + add eax,eax + jne rmarkp_bit_in_same_word1 + inc rbp + mov rax,1 +rmarkp_bit_in_same_word1: + test eax,dword ptr [rdi+rbp*4] + je rmarkp_not_yet_linked_bb + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + add rax,16 + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + mov rbp,qword ptr [rdx] + lea rax,(16+2+1)[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmarkp_next_node + +rmarkp_not_yet_linked_bb: + or dword ptr [rdi+rbp*4],eax + mov rbp,qword ptr [rdx] + lea rax,(16+2+1)[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmarkp_next_node + +rmarkp_record_3_ab: + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + cmp rdx,rcx + att_ja rmarkp_hnf_1 + + add eax,eax + jne rmarkp_bit_in_same_word2 + inc rbp + mov rax,1 +rmarkp_bit_in_same_word2: + test eax,dword ptr [rdi+rbp*4] + je rmarkp_not_yet_linked_ab + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + add rax,8 + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + mov rbp,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr 8[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmarkp_hnf_1 + +rmarkp_not_yet_linked_ab: + or dword ptr [rdi+rbp*4],eax + mov rbp,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr 8[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmarkp_hnf_1 + +rmarkp_record_3_aab: + cmp rsp,qword ptr end_stack[rip] + att_jb rmark_using_reversal_ + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,dword ptr [rdi+rbp*4] + att_jne rmarkp_shared_argument_part + or dword ptr [rdi+rbp*4],eax + + sub rsp,16 + mov qword ptr 8[rsp],rcx + lea rsi,8[rcx] + mov rcx,qword ptr [rcx] + mov qword ptr [rsp],rcx + + mov rcx,qword ptr [rdx] + + cmp rdx,rsi + att_ja rmarkp_no_reverse_argument_pointer + + lea rbp,3[rsi] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rbp + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmarkp_next_node + + mov rbx,rdx + att_jmp rmarkp_node_ + +rmarkp_record_2: + cmp word ptr (-2+2)[rax],1 + att_ja rmarkp_hnf_2 + att_je rmarkp_hnf_1 + att_jmp rmarkp_next_node + +rmarkp_record_1: + cmp word ptr (-2+2)[rax],0 + att_jne rmarkp_hnf_1 + att_jmp rmarkp_next_node + +rmarkp_lazy_node_1: +/* selectors: */ + jne rmarkp_selector_node_1 + +rmarkp_hnf_1: + mov rsi,rcx + mov rcx,qword ptr [rcx] + att_jmp rmarkp_node + +/* selectors */ +rmarkp_indirection_node: + mov rdx,qword ptr neg_heap_p3[rip] + sub rcx,8 + add rdx,rcx + + mov rbp,rdx + and rbp,31*8 + shr rdx,8 + lea r9,bit_clear_table2[rip] + mov ebp,dword ptr [r9+rbp] + and dword ptr [rdi+rdx*4],ebp + + mov rdx,rcx + cmp rcx,rbx + mov rcx,qword ptr 8[rcx] + mov qword ptr [rsi],rcx + att_ja rmarkp_node_d1 + mov qword ptr [rdx],rax + att_jmp rmarkp_node_d1 + +rmarkp_selector_node_1: + add rbp,3 + att_je rmarkp_indirection_node + + mov rdx,qword ptr [rcx] + mov qword ptr pointer_compare_address[rip],rbx + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,rdx + shr rbx,3 + + add rbp,1 + jle rmarkp_record_selector_node_1 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + lea r9,bit_set_table[rip] + mov ebp,dword ptr [r9+rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmarkp_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + att_je rmarkp_hnf_1 + + cmp word ptr (-2)[rbx],2 + jbe rmarkp_small_tuple_or_record + +rmarkp_large_tuple_or_record: + mov r10,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,r10 + shr rbx,3 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + lea r9,bit_set_table[rip] + mov ebp,dword ptr [r9+rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmarkp_hnf_1 + + mov rbx,qword ptr neg_heap_p3[rip] + lea rbx,(-8)[rcx+rbx] + + movsxd r11,dword ptr(-8)[rax] + add rax,r11 + + mov r11,rbx + and r11,31*8 + shr rbx,8 + lea r9,bit_clear_table2[rip] + mov r11d,dword ptr [r9+r11] + and dword ptr [rdi+rbx*4],r11d + + movzx eax,word ptr (4-8)[rax] + mov rbx,qword ptr pointer_compare_address[rip] + + lea r9,__indirection[rip] + mov qword ptr (-8)[rcx],r9 + + cmp rax,16 + jl rmarkp_tuple_or_record_selector_node_2 + + mov rdx,rcx + je rmarkp_tuple_selector_node_2 + + mov rcx,qword ptr (-24)[r10+rax] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rcx + att_jmp rmarkp_node_d1 + +rmarkp_tuple_selector_node_2: + mov rcx,qword ptr [r10] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rcx + att_jmp rmarkp_node_d1 + +rmarkp_record_selector_node_1: + je rmarkp_strict_record_selector_node_1 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + lea r9,bit_set_table[rip] + mov ebp,dword ptr [r9+rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmarkp_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + att_je rmarkp_hnf_1 + + cmp word ptr (-2)[rbx],258 + att_jbe rmarkp_small_tuple_or_record + + mov r10,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,r10 + shr rbx,3 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + lea r9,bit_set_table[rip] + mov ebp,dword ptr [r9+rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmarkp_hnf_1 + +rmarkp_small_tuple_or_record: + mov rbx,qword ptr neg_heap_p3[rip] + lea rbx,(-8)[rcx+rbx] + + movsxd r11,dword ptr(-8)[rax] + add rax,r11 + + mov r11,rbx + and r11,31*8 + shr rbx,8 + lea r9,bit_clear_table2[rip] + mov r11d,dword ptr [r9+r11] + and dword ptr [rdi+rbx*4],r11d + + movzx eax,word ptr (4-8)[rax] + mov rbx,qword ptr pointer_compare_address[rip] + + lea r9,__indirection[rip] + mov qword ptr (-8)[rcx],r9 + + cmp rax,16 + att_jle rmarkp_tuple_or_record_selector_node_2 + mov rdx,r10 + sub rax,24 +rmarkp_tuple_or_record_selector_node_2: + mov rbp,rcx + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rsi],rcx + mov qword ptr [rbp],rcx + mov rdx,rbp + att_jmp rmarkp_node_d1 + +rmarkp_strict_record_selector_node_1: + mov rbp,rbx + shr rbx,5 + and rbp,31 + lea r9,bit_set_table[rip] + mov ebp,dword ptr [r9+rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmarkp_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + att_je rmarkp_hnf_1 + + cmp word ptr (-2)[rbx],258 + jbe rmarkp_select_from_small_record + + mov r10,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,r10 + mov rbp,rbx + + shr rbx,8 + and rbp,31*8 + lea r9,bit_set_table2[rip] + mov ebp,dword ptr [r9+rbp] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + att_jne rmarkp_hnf_1 + +rmarkp_select_from_small_record: + movsxd rbx,dword ptr(-8)[rax] + add rbx,rax + sub rcx,8 + + cmp rcx,qword ptr pointer_compare_address[rip] + ja rmarkp_selector_pointer_not_reversed + + movzx eax,word ptr (4-8)[rbx] + cmp rax,16 + jle rmarkp_strict_record_selector_node_2 + mov rax,qword ptr (-24)[r10+rax] + jmp rmarkp_strict_record_selector_node_3 +rmarkp_strict_record_selector_node_2: + mov rax,qword ptr [rdx+rax] +rmarkp_strict_record_selector_node_3: + mov qword ptr 8[rcx],rax + + movzx eax,word ptr (6-8)[rbx] + test rax,rax + je rmarkp_strict_record_selector_node_5 + cmp rax,16 + jle rmarkp_strict_record_selector_node_4 + mov rdx,r10 + sub rax,24 +rmarkp_strict_record_selector_node_4: + mov rax,qword ptr [rdx+rax] + mov qword ptr 16[rcx],rax +rmarkp_strict_record_selector_node_5: + + mov rax,qword ptr ((-8)-8)[rbx] + add rsi,1 + mov qword ptr [rcx],rsi + mov qword ptr (-1)[rsi],rax + att_jmp rmarkp_next_node + +rmarkp_selector_pointer_not_reversed: + movzx eax,word ptr (4-8)[rbx] + cmp rax,16 + jle rmarkp_strict_record_selector_node_6 + mov rax,qword ptr (-24)[r10+rax] + jmp rmarkp_strict_record_selector_node_7 +rmarkp_strict_record_selector_node_6: + mov rax,qword ptr [rdx+rax] +rmarkp_strict_record_selector_node_7: + mov qword ptr 8[rcx],rax + + movzx eax,word ptr (6-8)[rbx] + test rax,rax + je rmarkp_strict_record_selector_node_9 + cmp rax,16 + jle rmarkp_strict_record_selector_node_8 + mov rdx,r10 + sub rax,24 +rmarkp_strict_record_selector_node_8: + mov rax,qword ptr [rdx+rax] + mov qword ptr 16[rcx],rax +rmarkp_strict_record_selector_node_9: + + mov rax,qword ptr ((-8)-8)[rbx] + mov qword ptr [rcx],rax + att_jmp rmarkp_next_node + +rmarkp_reverse_and_mark_next_node: + cmp rcx,rbx + att_ja rmarkp_next_node + + mov rax,qword ptr [rcx] + mov qword ptr [rsi],rax + add rsi,1 + mov qword ptr [rcx],rsi + +/* %rbp ,%rbx : free */ + +rmarkp_next_node: + mov rcx,qword ptr [rsp] + mov rsi,qword ptr 8[rsp] + add rsp,16 + + cmp rcx,1 + att_ja rmarkp_node + +rmarkp_next_node_: + mov rdx,qword ptr rmarkp_n_queue_items_16[rip] + test rdx,rdx + att_je end_rmarkp_nodes + + sub rsp,16 + + mov rbp,qword ptr rmarkp_queue_first[rip] + + lea rbx,[rbp+rdx] + add rdx,32 + + and rbx,7*32 + + mov qword ptr rmarkp_n_queue_items_16[rip],rdx + att_jmp rmarkp_last_item_in_queue + +end_rmarkp_nodes: + ret + +rmarkp_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + att_je rmarkp_next_node + + add rcx,8 + + sub rbp,1 + att_jle rmarkp_lazy_node_1 + + cmp rbp,255 + jge rmarkp_closure_with_unboxed_arguments + +rmarkp_closure_with_unboxed_arguments_: + lea rcx,[rcx+rbp*8] + +rmarkp_push_lazy_args: + mov rbx,qword ptr [rcx] + sub rsp,16 + mov qword ptr 8[rsp],rcx + sub rcx,8 + mov qword ptr [rsp],rbx + sub rbp,1 + att_jg rmarkp_push_lazy_args + + mov rsi,rcx + mov rcx,qword ptr [rcx] + + cmp rsp,qword ptr end_stack[rip] + att_jae rmarkp_node + + att_jmp rmark_using_reversal + +rmarkp_closure_with_unboxed_arguments: +/* (a_size+b_size)+(b_size<<8) */ +/* addl $1,%rbp */ + mov rax,rbp + and rbp,255 + shr rax,8 + sub rbp,rax +/* subl $1,%rbp */ + att_jg rmarkp_closure_with_unboxed_arguments_ + att_je rmarkp_hnf_1 + att_jmp rmarkp_next_node + +rmarkp_hnf_0: + lea r9,dINT+2[rip] + cmp rax,r9 + je rmarkp_int_3 + + lea r9,CHAR+2[rip] + cmp rax,r9 + je rmarkp_char_3 + + jb rmarkp_no_normal_hnf_0 + + mov rbp,qword ptr neg_heap_p3[rip] + add rbp,rcx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + lea r9,bit_clear_table2[rip] + mov edx,dword ptr [r9+rdx] + and dword ptr [rdi+rbp*4],edx + + lea rdx,((-8)-2)[rax] + mov qword ptr [rsi],rdx + cmp rcx,rbx + att_ja rmarkp_next_node + mov qword ptr [rcx],rax + att_jmp rmarkp_next_node + +rmarkp_int_3: + mov rbp,qword ptr 8[rcx] + cmp rbp,33 + att_jnc rmarkp_next_node + + shl rbp,4 + lea rdx,small_integers[rip] + add rdx,rbp + mov rbp,qword ptr neg_heap_p3[rip] + mov qword ptr [rsi],rdx + add rbp,rcx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + lea r9,bit_clear_table2[rip] + mov edx,dword ptr [r9+rdx] + and dword ptr [rdi+rbp*4],edx + + cmp rcx,rbx + att_ja rmarkp_next_node + mov qword ptr [rcx],rax + att_jmp rmarkp_next_node + +rmarkp_char_3: + movzx rdx,byte ptr 8[rcx] + mov rbp,qword ptr neg_heap_p3[rip] + + shl rdx,4 + add rbp,rcx + lea r9,static_characters[rip] + add rdx,r9 + mov qword ptr [rsi],rdx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + lea r9,bit_clear_table2[rip] + mov edx,dword ptr [r9+rdx] + and dword ptr [rdi+rbp*4],edx + + cmp rcx,rbx + att_ja rmarkp_next_node + mov qword ptr [rcx],rax + att_jmp rmarkp_next_node + +rmarkp_no_normal_hnf_0: + lea r9,__ARRAY__+2[rip] + cmp rax,r9 + att_jne rmarkp_next_node + + mov rax,qword ptr 16[rcx] + test rax,rax + je rmarkp_lazy_array + + movzx rdx,word ptr (-2+2)[rax] + test rdx,rdx + je rmarkp_b_array + + movzx rax,word ptr (-2)[rax] + test rax,rax + att_je rmarkp_b_array + + cmp rsp,qword ptr end_stack[rip] + att_jb rmark_array_using_reversal + + sub rax,256 + cmp rdx,rax + mov rbx,rdx + je rmarkp_a_record_array + +rmarkp_ab_record_array: + mov rdx,qword ptr 8[rcx] + add rcx,16 + push rcx + + imul rdx,rax + shl rdx,3 + + sub rax,rbx + add rcx,8 + add rdx,rcx + att_call reorder + + pop rcx + mov rax,rbx + imul rax,qword ptr (-8)[rcx] + jmp rmarkp_lr_array + +rmarkp_b_array: + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + add rax,8 + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + att_jmp rmarkp_next_node + +rmarkp_a_record_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + cmp rbx,2 + att_jb rmarkp_lr_array + + imul rax,rbx + att_jmp rmarkp_lr_array + +rmarkp_lazy_array: + cmp rsp,qword ptr end_stack[rip] + att_jb rmark_array_using_reversal + + mov rax,qword ptr 8[rcx] + add rcx,16 + +rmarkp_lr_array: + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,rcx + shr rbx,3 + add rbx,rax + + mov rdx,rbx + and rbx,31 + shr rdx,5 + lea r9,bit_set_table[rip] + mov ebx,dword ptr [r9+rbx*4] + or dword ptr [rdi+rdx*4],ebx + + cmp rax,1 + jbe rmarkp_array_length_0_1 + + mov rdx,rcx + lea rcx,[rcx+rax*8] + + mov rax,qword ptr [rcx] + + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + + mov qword ptr [rcx],rbx + + mov rax,qword ptr (-8)[rcx] + sub rcx,8 + + mov rbx,qword ptr (-8)[rdx] + + sub rdx,8 + mov qword ptr [rcx],rbx + + mov qword ptr [rdx],rax + + push rcx + mov rsi,rdx + jmp rmarkp_array_nodes + +rmarkp_array_nodes1: + cmp rcx,rsi + ja rmarkp_next_array_node + + mov rbx,qword ptr [rcx] + lea rax,1[rsi] + mov qword ptr [rsi],rbx + mov qword ptr [rcx],rax + +rmarkp_next_array_node: + add rsi,8 + cmp rsi,qword ptr [rsp] + je end_rmarkp_array_node + +rmarkp_array_nodes: + mov rcx,qword ptr [rsi] + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + att_jnc rmarkp_next_array_node + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebp,dword ptr [rdi+rbx*4] + test rbp,rax + att_jne rmarkp_array_nodes1 + + or rbp,rax + mov dword ptr [rdi+rbx*4],ebp + + mov rax,qword ptr [rcx] + call rmarkp_array_node + + add rsi,8 + cmp rsi,qword ptr [rsp] + att_jne rmarkp_array_nodes + +end_rmarkp_array_node: + add rsp,8 + att_jmp rmarkp_next_node + +rmarkp_array_node: + sub rsp,16 + mov qword ptr 8[rsp],rsi + mov rbx,rsi + mov qword ptr [rsp],1 + att_jmp rmarkp_arguments + +rmarkp_array_length_0_1: + lea rcx,-16[rcx] + att_jb rmarkp_next_node + + mov rbx,qword ptr 24[rcx] + mov rbp,qword ptr 16[rcx] + mov qword ptr 24[rcx],rbp + mov rbp,qword ptr 8[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr 8[rcx],rbx + add rcx,8 + att_jmp rmarkp_hnf_1 diff --git a/macho64/acompact_rmarkr.s b/macho64/acompact_rmarkr.s new file mode 100644 index 0000000..852978f --- /dev/null +++ b/macho64/acompact_rmarkr.s @@ -0,0 +1,994 @@ + +rmark_using_reversal: + push rsi + push rsi + mov rsi,1 + jmp rmarkr_node + +rmark_using_reversal_: + sub rcx,8 + push rbx + push rsi + cmp rcx,rbx + ja rmark_no_undo_reverse_1 + mov qword ptr [rsi],rcx + mov qword ptr [rcx],rax +rmark_no_undo_reverse_1: + mov rsi,1 + jmp rmarkr_arguments + +rmark_array_using_reversal: + push rbx + push rsi + cmp rcx,rbx + ja rmark_no_undo_reverse_2 + mov qword ptr [rsi],rcx + lea r9,__ARRAY__+2[rip] + mov qword ptr [rcx],r9 +rmark_no_undo_reverse_2: + mov rsi,1 + att_jmp rmarkr_arguments + +rmarkr_hnf_2: + or qword ptr [rcx],2 + mov rbp,qword ptr 8[rcx] + mov qword ptr 8[rcx],rsi + lea rsi,8[rcx] + mov rcx,rbp + +rmarkr_node: + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + cmp rax,qword ptr heap_size_64_65[rip] + jnc rmarkr_next_node_after_static + + mov rbx,rax + and rax,31*8 + shr rbx,8 + + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebp,dword ptr [rdi+rbx*4] + + test rbp,rax + jne rmarkr_next_node + + or rbp,rax + mov dword ptr [rdi+rbx*4],ebp + +rmarkr_arguments: + mov rax,qword ptr [rcx] + test al,2 + je rmarkr_lazy_node + + movzx rbp,word ptr (-2)[rax] + test rbp,rbp + je rmarkr_hnf_0 + + add rcx,8 + + cmp rbp,256 + jae rmarkr_record + + sub rbp,2 + att_je rmarkr_hnf_2 + jc rmarkr_hnf_1 + +rmarkr_hnf_3: + mov rdx,qword ptr 8[rcx] + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,[rdi+rbx*4] + jne rmarkr_shared_argument_part + + or dword ptr [rdi+rbx*4],eax + +rmarkr_no_shared_argument_part: + or qword ptr [rcx],2 + mov qword ptr 8[rcx],rsi + add rcx,8 + + or qword ptr [rdx],1 + lea rdx,[rdx+rbp*8] + + mov rbp,qword ptr [rdx] + mov qword ptr [rdx],rcx + mov rsi,rdx + mov rcx,rbp + att_jmp rmarkr_node + +rmarkr_shared_argument_part: + cmp rdx,rcx + att_ja rmarkr_hnf_1 + + mov rbx,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr [rdx],rax + mov qword ptr 8[rcx],rbx + att_jmp rmarkr_hnf_1 + +rmarkr_record: + sub rbp,258 + je rmarkr_record_2 + jb rmarkr_record_1 + +rmarkr_record_3: + movzx rbp,word ptr (-2+2)[rax] + sub rbp,1 + jb rmarkr_record_3_bb + je rmarkr_record_3_ab + dec rbp + je rmarkr_record_3_aab + att_jmp rmarkr_hnf_3 + +rmarkr_record_3_bb: + mov rdx,qword ptr (16-8)[rcx] + sub rcx,8 + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + cmp rdx,rcx + att_ja rmarkr_next_node + + add eax,eax + jne rmarkr_bit_in_same_word1 + inc rbp + mov rax,1 +rmarkr_bit_in_same_word1: + test eax,dword ptr [rdi+rbp*4] + je rmarkr_not_yet_linked_bb + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + add rax,2*8 + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + mov rbp,qword ptr [rdx] + lea rax,(16+2+1)[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmarkr_next_node + +rmarkr_not_yet_linked_bb: + or dword ptr [rdi+rbp*4],eax + mov rbp,qword ptr [rdx] + lea rax,(16+2+1)[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmarkr_next_node + +rmarkr_record_3_ab: + mov rdx,qword ptr 8[rcx] + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + cmp rdx,rcx + att_ja rmarkr_hnf_1 + + add eax,eax + jne rmarkr_bit_in_same_word2 + inc rbp + mov rax,1 +rmarkr_bit_in_same_word2: + test eax,dword ptr [rdi+rbp*4] + je rmarkr_not_yet_linked_ab + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + + add rax,8 + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + + mov rbp,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr 8[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmarkr_hnf_1 + +rmarkr_not_yet_linked_ab: + or dword ptr [rdi+rbp*4],eax + mov rbp,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr 8[rcx],rbp + mov qword ptr [rdx],rax + att_jmp rmarkr_hnf_1 + +rmarkr_record_3_aab: + mov rdx,qword ptr 8[rcx] + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,dword ptr [rdi+rbp*4] + att_jne rmarkr_shared_argument_part + or dword ptr [rdi+rbp*4],eax + + add qword ptr [rcx],2 + mov qword ptr 8[rcx],rsi + add rcx,8 + + mov rsi,qword ptr [rdx] + mov qword ptr [rdx],rcx + mov rcx,rsi + lea rsi,1[rdx] + att_jmp rmarkr_node + +rmarkr_record_2: + cmp word ptr (-2+2)[rax],1 + att_ja rmarkr_hnf_2 + att_je rmarkr_hnf_1 + sub rcx,8 + att_jmp rmarkr_next_node + +rmarkr_record_1: + cmp word ptr (-2+2)[rax],0 + att_jne rmarkr_hnf_1 + sub rcx,8 + att_jmp rmarkr_next_node + +rmarkr_lazy_node_1: + jne rmarkr_selector_node_1 + +rmarkr_hnf_1: + mov rbp,qword ptr [rcx] + mov qword ptr [rcx],rsi + + lea rsi,2[rcx] + mov rcx,rbp + att_jmp rmarkr_node + +rmarkr_indirection_node: + mov rbx,qword ptr neg_heap_p3[rip] + lea rbx,(-8)[rcx+rbx] + + mov rax,rbx + and rax,31*8 + shr rbx,8 + lea r9,bit_clear_table2[rip] + mov eax,dword ptr [r9+rax] + and dword ptr [rdi+rbx*4],eax + + mov rcx,qword ptr [rcx] + att_jmp rmarkr_node + +rmarkr_selector_node_1: + add rbp,3 + att_je rmarkr_indirection_node + + mov rdx,qword ptr [rcx] + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,rdx + shr rbx,3 + + add rbp,1 + jle rmarkr_record_selector_node_1 + + push rax + mov rax,rbx + + shr rbx,5 + and rax,31 + + lea r9,bit_set_table[rip] + mov eax,dword ptr [r9+rax*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rax + + pop rax + att_jne rmarkr_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + att_je rmarkr_hnf_1 + + cmp word ptr (-2)[rbx],2 + jbe rmarkr_small_tuple_or_record + +rmarkr_large_tuple_or_record: + mov rbx,qword ptr 16[rdx] + add rbx,qword ptr neg_heap_p3[rip] + shr rbx,3 + + push rax + mov rax,rbx + + shr rbx,5 + and rax,31 + + lea r9,bit_set_table[rip] + mov eax,dword ptr [r9+rax*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rax + + pop rax + att_jne rmarkr_hnf_1 + + mov rbx,qword ptr neg_heap_p3[rip] + lea rbx,(-8)[rcx+rbx] + + push rcx + + movsxd rcx,dword ptr (-8)[rax] + add rax,rcx + + mov rcx,rbx + and rcx,31*8 + shr rbx,8 + lea r9,bit_clear_table2[rip] + mov ecx,dword ptr [r9+rcx] + and dword ptr [rdi+rbx*4],ecx + + movzx eax,word ptr (4-8)[rax] + cmp rax,16 + jl rmarkr_tuple_or_record_selector_node_2 + mov rdx,qword ptr 16[rdx] + je rmarkr_tuple_selector_node_2 + mov rcx,qword ptr (-24)[rdx+rax] + pop rdx + lea r9,__indirection[rip] + mov qword ptr (-8)[rdx],r9 + mov qword ptr [rdx],rcx + att_jmp rmarkr_node + +rmarkr_tuple_selector_node_2: + mov rcx,qword ptr [rdx] + pop rdx + lea r9,__indirection[rip] + mov qword ptr (-8)[rdx],r9 + mov qword ptr [rdx],rcx + att_jmp rmarkr_node + +rmarkr_record_selector_node_1: + je rmarkr_strict_record_selector_node_1 + + push rax + mov rax,rbx + + shr rbx,5 + and rax,31 + + lea r9,bit_set_table[rip] + mov eax,dword ptr [r9+rax*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rax + + pop rax + att_jne rmarkr_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + att_je rmarkr_hnf_1 + + cmp word ptr (-2)[rbx],258 + att_jbe rmarkr_small_tuple_or_record + + mov rbx,qword ptr 16[rdx] + add rbx,qword ptr neg_heap_p3[rip] + shr rbx,3 + + push rax + mov rax,rbx + shr rbx,5 + and rax,31 + lea r9,bit_set_table[rip] + mov eax,dword ptr [r9+rax*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rax + pop rax + att_jne rmarkr_hnf_1 + +rmarkr_small_tuple_or_record: + mov rbx,qword ptr neg_heap_p3[rip] + lea rbx,(-8)[rcx+rbx] + + push rcx + + movsxd rcx,dword ptr(-8)[rax] + add rax,rcx + + mov rcx,rbx + and rcx,31*8 + shr rbx,8 + lea r9,bit_clear_table2[rip] + mov ecx,dword ptr [r9+rcx] + and dword ptr [rdi+rbx*4],ecx + + movzx eax,word ptr (4-8)[rax] + cmp rax,16 + att_jle rmarkr_tuple_or_record_selector_node_2 + mov rdx,qword ptr 16[rdx] + sub rax,24 +rmarkr_tuple_or_record_selector_node_2: + mov rcx,qword ptr [rdx+rax] + pop rdx + lea r9,__indirection[rip] + mov qword ptr (-8)[rdx],r9 + mov qword ptr [rdx],rcx + att_jmp rmarkr_node + +rmarkr_strict_record_selector_node_1: + push rax + mov rax,rbx + + shr rbx,5 + and rax,31 + + lea r9,bit_set_table[rip] + mov eax,dword ptr [r9+rax*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rax + + pop rax + att_jne rmarkr_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + att_je rmarkr_hnf_1 + + cmp word ptr (-2)[rbx],258 + jbe rmarkr_select_from_small_record + + mov rbx,qword ptr 16[rdx] + add rbx,qword ptr neg_heap_p3[rip] + + push rax + mov rax,rbx + + shr rbx,8 + and rax,31*8 + + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rax + + pop rax + att_jne rmarkr_hnf_1 + +rmarkr_select_from_small_record: + movsxd rbx,dword ptr(-8)[rax] + add rax,rbx + sub rcx,8 + + movzx ebx,word ptr (4-8)[rax] + cmp rbx,16 + jle rmarkr_strict_record_selector_node_2 + add rbx,qword ptr 16[rdx] + mov rbx,qword ptr (-24)[rbx] + jmp rmarkr_strict_record_selector_node_3 +rmarkr_strict_record_selector_node_2: + mov rbx,qword ptr [rdx+rbx] +rmarkr_strict_record_selector_node_3: + mov qword ptr 8[rcx],rbx + + movzx ebx,word ptr (6-8)[rax] + test rbx,rbx + je rmarkr_strict_record_selector_node_5 + cmp rbx,16 + jle rmarkr_strict_record_selector_node_4 + mov rdx,qword ptr 16[rdx] + sub rbx,24 +rmarkr_strict_record_selector_node_4: + mov rbx,qword ptr [rdx+rbx] + mov qword ptr 16[rcx],rbx +rmarkr_strict_record_selector_node_5: + + mov rax,qword ptr ((-8)-8)[rbx] + mov qword ptr [rcx],rax + att_jmp rmarkr_next_node + +/* a2,d1: free */ + +rmarkr_next_node: + test rsi,3 + jne rmarkr_parent + + mov rbp,qword ptr (-8)[rsi] + mov rbx,3 + + and rbx,rbp + sub rsi,8 + + cmp rbx,3 + je rmarkr_argument_part_cycle1 + + mov rdx,qword ptr 8[rsi] + mov qword ptr [rsi],rdx + +rmarkr_c_argument_part_cycle1: + cmp rcx,rsi + ja rmarkr_no_reverse_1 + + mov rdx,qword ptr [rcx] + lea rax,(8+1)[rsi] + mov qword ptr 8[rsi],rdx + mov qword ptr [rcx],rax + + or rsi,rbx + mov rcx,rbp + xor rcx,rbx + att_jmp rmarkr_node + +rmarkr_no_reverse_1: + mov qword ptr 8[rsi],rcx + mov rcx,rbp + or rsi,rbx + xor rcx,rbx + att_jmp rmarkr_node + +rmarkr_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + att_je rmarkr_next_node + + add rcx,8 + + sub rbp,1 + att_jle rmarkr_lazy_node_1 + + cmp rbp,255 + jge rmarkr_closure_with_unboxed_arguments + +rmarkr_closure_with_unboxed_arguments_: + or qword ptr [rcx],2 + lea rcx,[rcx+rbp*8] + + mov rbp,qword ptr [rcx] + mov qword ptr [rcx],rsi + mov rsi,rcx + mov rcx,rbp + att_jmp rmarkr_node + +rmarkr_closure_with_unboxed_arguments: +/* (a_size+b_size)+(b_size<<8) */ +/* add rbp,1 */ + mov rax,rbp + and rbp,255 + shr rax,8 + sub rbp,rax +/* sub rbp,1 */ + att_jg rmarkr_closure_with_unboxed_arguments_ + att_je rmarkr_hnf_1 + sub rcx,8 + att_jmp rmarkr_next_node + +rmarkr_hnf_0: + lea r9,dINT+2[rip] + cmp rax,r9 + je rmarkr_int_3 + + lea r9,CHAR+2[rip] + cmp rax,r9 + je rmarkr_char_3 + + jb rmarkr_no_normal_hnf_0 + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,rcx + + mov rcx,rbx + and rcx,31*8 + shr rbx,8 + lea r9,bit_clear_table2[rip] + mov ecx,dword ptr [r9+rcx] + and dword ptr [rdi+rbx*4],ecx + + lea rcx,((-8)-2)[rax] + att_jmp rmarkr_next_node_after_static + +rmarkr_int_3: + mov rbp,qword ptr 8[rcx] + cmp rbp,33 + att_jnc rmarkr_next_node + + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,rcx + + mov rcx,rbx + and rcx,31*8 + shr rbx,8 + lea r9,bit_clear_table2[rip] + mov ecx,dword ptr [r9+rcx] + shl rbp,4 + and dword ptr [rdi+rbx*4],ecx + + lea rcx,small_integers[rip] + add rcx,rbp + att_jmp rmarkr_next_node_after_static + +rmarkr_char_3: + mov rbx,qword ptr neg_heap_p3[rip] + + movzx rax,byte ptr 8[rcx] + add rbx,rcx + + mov rbp,rbx + and rbp,31*8 + shr rbx,8 + lea r9,bit_clear_table2[rip] + mov ebp,dword ptr [r9+rbp] + and dword ptr [rdi+rbx*4],ebp + + shl rax,4 + lea rcx,static_characters[rip] + add rcx,rax + att_jmp rmarkr_next_node_after_static + +rmarkr_no_normal_hnf_0: + lea r9,__ARRAY__+2[rip] + cmp rax,r9 + att_jne rmarkr_next_node + + mov rax,qword ptr 16[rcx] + test rax,rax + je rmarkr_lazy_array + + movzx rbx,word ptr (-2+2)[rax] + test rbx,rbx + je rmarkr_b_array + + movzx rax,word ptr (-2)[rax] + test rax,rax + att_je rmarkr_b_array + + sub rax,256 + cmp rbx,rax + je rmarkr_a_record_array + +rmarkr_ab_record_array: + mov rdx,qword ptr 8[rcx] + add rcx,16 + push rcx + + imul rdx,rax + shl rdx,3 + + sub rax,rbx + add rcx,8 + add rdx,rcx + att_call reorder + + pop rcx + mov rax,rbx + imul rax,qword ptr (-8)[rcx] + jmp rmarkr_lr_array + +rmarkr_b_array: + mov rax,qword ptr neg_heap_p3[rip] + add rax,rcx + add rax,8 + mov rbp,rax + and rax,31*8 + shr rbp,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + or dword ptr [rdi+rbp*4],eax + att_jmp rmarkr_next_node + +rmarkr_a_record_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + cmp rbx,2 + att_jb rmarkr_lr_array + + imul rax,rbx + att_jmp rmarkr_lr_array + +rmarkr_lazy_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + +rmarkr_lr_array: + mov rbx,qword ptr neg_heap_p3[rip] + add rbx,rcx + shr rbx,3 + add rbx,rax + + mov rdx,rbx + and rbx,31 + shr rdx,5 + lea r9,bit_set_table[rip] + mov ebx,dword ptr [r9+rbx*4] + or dword ptr [rdi+rdx*4],ebx + + cmp rax,1 + jbe rmarkr_array_length_0_1 + + mov rdx,rcx + lea rcx,[rcx+rax*8] + + mov rax,qword ptr [rcx] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov qword ptr [rcx],rbx + + mov rax,qword ptr (-8)[rcx] + sub rcx,8 + add rax,2 + mov rbx,qword ptr (-8)[rdx] + sub rdx,8 + mov qword ptr [rcx],rbx + mov qword ptr [rdx],rax + + mov rax,qword ptr (-8)[rcx] + sub rcx,8 + mov qword ptr [rcx],rsi + mov rsi,rcx + mov rcx,rax + att_jmp rmarkr_node + +rmarkr_array_length_0_1: + lea rcx,-16[rcx] + att_jb rmarkr_next_node + + mov rbx,qword ptr 24[rcx] + mov rbp,qword ptr 16[rcx] + mov qword ptr 24[rcx],rbp + mov rbp,qword ptr 8[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr 8[rcx],rbx + add rcx,8 + att_jmp rmarkr_hnf_1 + +/* a2: free */ + +rmarkr_parent: + mov rbx,rsi + and rbx,3 + + and rsi,-4 + je end_rmarkr + + sub rbx,1 + je rmarkr_argument_part_parent + + mov rbp,qword ptr [rsi] + + cmp rcx,rsi + ja rmarkr_no_reverse_2 + + mov rdx,rcx + lea rax,1[rsi] + mov rcx,qword ptr [rdx] + mov qword ptr [rdx],rax + +rmarkr_no_reverse_2: + mov qword ptr [rsi],rcx + lea rcx,(-8)[rsi] + mov rsi,rbp + att_jmp rmarkr_next_node + +rmarkr_argument_part_parent: + mov rbp,qword ptr [rsi] + + mov rdx,rsi + mov rsi,rcx + mov rcx,rdx + +rmarkr_skip_upward_pointers: + mov rax,rbp + and rax,3 + cmp rax,3 + jne rmarkr_no_upward_pointer + + lea rdx,(-3)[rbp] + mov rbp,qword ptr (-3)[rbp] + att_jmp rmarkr_skip_upward_pointers + +rmarkr_no_upward_pointer: + cmp rsi,rcx + ja rmarkr_no_reverse_3 + + mov rbx,rsi + mov rsi,qword ptr [rsi] + lea rax,1[rcx] + mov qword ptr [rbx],rax + +rmarkr_no_reverse_3: + mov qword ptr [rdx],rsi + lea rsi,(-8)[rbp] + + and rsi,-4 + + mov rdx,rsi + mov rbx,3 + + mov rbp,qword ptr [rsi] + + and rbx,rbp + mov rax,qword ptr 8[rdx] + + or rsi,rbx + mov qword ptr [rdx],rax + + cmp rcx,rdx + ja rmarkr_no_reverse_4 + + mov rax,qword ptr [rcx] + mov qword ptr 8[rdx],rax + lea rax,(8+2+1)[rdx] + mov qword ptr [rcx],rax + mov rcx,rbp + and rcx,-4 + att_jmp rmarkr_node + +rmarkr_no_reverse_4: + mov qword ptr 8[rdx],rcx + mov rcx,rbp + and rcx,-4 + att_jmp rmarkr_node + +rmarkr_argument_part_cycle1: + mov rax,qword ptr 8[rsi] + push rdx + +rmarkr_skip_pointer_list1: + mov rdx,rbp + and rdx,-4 + mov rbp,qword ptr [rdx] + mov rbx,3 + and rbx,rbp + cmp rbx,3 + att_je rmarkr_skip_pointer_list1 + + mov qword ptr [rdx],rax + pop rdx + att_jmp rmarkr_c_argument_part_cycle1 + +rmarkr_next_node_after_static: + test rsi,3 + jne rmarkr_parent_after_static + + mov rbp,qword ptr (-8)[rsi] + mov rbx,3 + + and rbx,rbp + sub rsi,8 + + cmp rbx,3 + je rmarkr_argument_part_cycle2 + + mov rax,qword ptr 8[rsi] + mov qword ptr [rsi],rax + +rmarkr_c_argument_part_cycle2: + mov qword ptr 8[rsi],rcx + mov rcx,rbp + or rsi,rbx + xor rcx,rbx + att_jmp rmarkr_node + +rmarkr_parent_after_static: + mov rbx,rsi + and rbx,3 + + and rsi,-4 + je end_rmarkr_after_static + + sub rbx,1 + je rmarkr_argument_part_parent_after_static + + mov rbp,qword ptr [rsi] + mov qword ptr [rsi],rcx + lea rcx,(-8)[rsi] + mov rsi,rbp + att_jmp rmarkr_next_node + +rmarkr_argument_part_parent_after_static: + mov rbp,qword ptr [rsi] + + mov rdx,rsi + mov rsi,rcx + mov rcx,rdx + +/* movl rbp,qword ptr [rdx] */ +rmarkr_skip_upward_pointers_2: + mov rax,rbp + and rax,3 + cmp rax,3 + att_jne rmarkr_no_reverse_3 + + lea rdx,(-3)[rbp] + mov rbp,qword ptr (-3)[rbp] + att_jmp rmarkr_skip_upward_pointers_2 + +rmarkr_argument_part_cycle2: + mov rax,qword ptr 8[rsi] + push rdx + +rmarkr_skip_pointer_list2: + mov rdx,rbp + and rdx,-4 + mov rbp,qword ptr [rdx] + mov rbx,3 + and rbx,rbp + cmp rbx,3 + att_je rmarkr_skip_pointer_list2 + + mov qword ptr [rdx],rax + pop rdx + att_jmp rmarkr_c_argument_part_cycle2 + +end_rmarkr_after_static: + mov rsi,qword ptr [rsp] + add rsp,16 + mov qword ptr [rsi],rcx + jmp rmarkr_next_stack_node + +end_rmarkr: + pop rsi + pop rbx + + cmp rcx,rbx + ja rmarkr_no_reverse_5 + + mov rdx,rcx + lea rax,1[rsi] + mov rcx,qword ptr [rcx] + mov qword ptr [rdx],rax + +rmarkr_no_reverse_5: + mov qword ptr [rsi],rcx + +rmarkr_next_stack_node: + cmp rsp,qword ptr end_stack[rip] + jae rmarkr_end + + mov rcx,qword ptr [rsp] + mov rsi,qword ptr 8[rsp] + add rsp,16 + + cmp rcx,1 + att_ja rmark_using_reversal + + test qword ptr _flags[rip],4096 + att_je rmark_next_node_ + att_jmp rmarkp_next_node_ + +rmarkr_end: + test qword ptr _flags[rip],4096 + att_je rmark_next_node + att_jmp rmarkp_next_node diff --git a/macho64/acopy.s b/macho64/acopy.s new file mode 100644 index 0000000..b4b68bc --- /dev/null +++ b/macho64/acopy.s @@ -0,0 +1,1267 @@ + +COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP = 1 + + push rsi + + mov rdi,heap_p2[rip] + + mov rax,heap_size_257[rip] + shl rax,7 + mov semi_space_size[rip],rax + lea rsi,[rdi+rax] + + mov qword ptr (heap2_begin_and_end+8)[rip],rsi + + mov rax,qword ptr caf_list[rip] + test rax,rax + je end_copy_cafs + +copy_cafs_lp: + push (-8)[rax] + + lea rbp,8[rax] + mov rbx,qword ptr [rax] + sub rbx,1 + call copy_lp2 + + pop rax + test rax,rax + att_jne copy_cafs_lp + +end_copy_cafs: + mov rbx,qword ptr [rsp] + mov rbp,stack_p[rip] + sub rbx,rbp + shr rbx,3 + + sub rbx,1 + jb end_copy0 + att_call copy_lp2 +end_copy0: + mov rbp,heap_p2[rip] + + jmp copy_lp1 +/* */ +/* Copy all referenced nodes to the other semi space */ +/* */ + +in_hnf_1_2: + dec rbx +copy_lp2_lp1: + att_call copy_lp2 +copy_lp1: + cmp rbp,rdi + jae end_copy1 + + mov rax,[rbp] + add rbp,8 + test al,2 + je not_in_hnf_1 +in_hnf_1: + movzx rbx,word ptr (-2)[rax] + + test rbx,rbx + je copy_array_21 + + cmp rbx,2 + att_jbe in_hnf_1_2 + + cmp rbx,256 + jae copy_record_21 + + mov rax,8[rbp] + + test al,1 + jne node_without_arguments_part + + push rbx + xor rbx,rbx + + att_call copy_lp2 + + pop rbx + add rbp,8 + + sub rbx,2 + att_jmp copy_lp2_lp1 + +node_without_arguments_part: + dec rax + xor rbx,rbx + + mov 8[rbp],rax + att_call copy_lp2 + + add rbp,8 + att_jmp copy_lp1 + +copy_record_21: + sub rbx,258 + ja copy_record_arguments_3 + + movzx rbx,word ptr (-2+2)[rax] + .if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + att_jb in_hnf_1_2 + + sub rbx,1 + att_ja copy_lp2_lp1 + jmp copy_node_arity1 + .else + jb copy_record_arguments_1 + + sub rbx,1 + ja copy_lp2_lp1 + je copy_node_arity1 + add rbp,16 + jmp copy_lp1 + +copy_record_arguments_1: + dec rbx + jmp copy_lp2_lp1 + je copy_lp2_lp1 + add rbp,8 + jmp copy_lp1 + .endif + +copy_record_arguments_3: + test byte ptr 8[rbp],1 + jne record_node_without_arguments_part + + movzx rdx,word ptr (-2+2)[rax] + .if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + sub rdx,1 + .else + test rdx,rdx + je copy_record_arguments_3b + sub rdx,1 + je copy_record_arguments_3abb + .endif + + lea rcx,(3*8)[rbp+rbx*8] + push rcx + push rdx + + sub rbx,rbx + att_call copy_lp2 + + add rbp,8 + pop rbx + dec rbx + att_call copy_lp2 + + pop rbp + att_jmp copy_lp1 + + .if ! COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP +copy_record_arguments_3abb: + push rbx + sub rbx,rbx + + call copy_lp2 + + pop rbx + + lea rbp,(2*8)[rbp+rbx*8] + jmp copy_lp1 + +copy_record_arguments_3b: + lea rbp,(3*8)[rbp+rbx*8] + jmp copy_lp1 + .endif + +record_node_without_arguments_part: + and qword ptr 8[rbp],-2 + + .if ! COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + cmp word ptr (-2+2)[rax],0 + je record_node_without_arguments_part_3b + .endif + + sub rbx,rbx + att_call copy_lp2 + + add rbp,8 + att_jmp copy_lp1 + + .if ! COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP +record_node_without_arguments_part_3b: + add rbp,16 + jmp copy_lp1 + .endif + +not_in_hnf_1: + movsxd rbx,dword ptr (-4)[rax] + cmp rbx,257 + jge copy_unboxed_closure_arguments + sub rbx,1 + att_jg copy_lp2_lp1 + +copy_node_arity1: + xor rbx,rbx + att_call copy_lp2 + + add rbp,8 + att_jmp copy_lp1 + +copy_unboxed_closure_arguments: + je copy_unboxed_closure_arguments1 + + xor rax,rax + mov al,bh + and rbx,255 + sub rbx,rax + + sub rbx,1 + jl copy_unboxed_closure_arguments_without_pointers + + push rax + att_call copy_lp2 + pop rax + +copy_unboxed_closure_arguments_without_pointers: + lea rbp,[rbp+rax*8] + att_jmp copy_lp1 + +copy_unboxed_closure_arguments1: + add rbp,16 + att_jmp copy_lp1 + +copy_array_21: + mov rbx,qword ptr 8[rbp] + add rbp,16 + test rbx,rbx + je copy_array_21_a + + movzx rax,word ptr (-2)[rbx] + movzx rbx,word ptr (-2+2)[rbx] + sub rax,256 + test rbx,rbx + je copy_array_21_b + + cmp rbx,rax + je copy_array_21_r_a + +copy_array_21_ab: + cmp qword ptr (-16)[rbp],0 + att_je copy_lp1 + + sub rax,rbx + shl rax,3 + sub rbx,1 + + push rbx + push rax + mov rbx,qword ptr (-16)[rbp] + sub rbx,1 + push rbx + +copy_array_21_lp_ab: + mov rbx,qword ptr 16[rsp] + att_call copy_lp2 + + add rbp,qword ptr 8[rsp] + sub qword ptr [rsp],1 + att_jnc copy_array_21_lp_ab + + add rsp,24 + att_jmp copy_lp1 + +copy_array_21_b: + mov rbx,qword ptr (-16)[rbp] + imul rbx,rax + lea rbp,[rbp+rbx*8] + att_jmp copy_lp1 + +copy_array_21_r_a: + mov rbx,qword ptr (-16)[rbp] + imul rbx,rax + sub rbx,1 + att_jc copy_lp1 + att_jmp copy_lp2_lp1 + +copy_array_21_a: + mov rbx,qword ptr (-16)[rbp] + sub rbx,1 + att_jc copy_lp1 + att_jmp copy_lp2_lp1 + +/* */ +/* Copy nodes to the other semi-space */ +/* */ + +copy_lp2: + mov rdx,qword ptr [rbp] + +/* selectors: */ +continue_after_selector_2: + mov rcx,qword ptr [rdx] + test cl,2 + je not_in_hnf_2 + +in_hnf_2: + movzx rax,word ptr (-2)[rcx] + test rax,rax + je copy_arity_0_node2 + + cmp rax,256 + jae copy_record_2 + + sub rax,2 + mov [rbp],rdi + + lea rbp,8[rbp ] + ja copy_hnf_node2_3 + + mov [rdi],rcx + jb copy_hnf_node2_1 + + inc rdi + mov rcx,8[rdx] + + mov [rdx],rdi + mov rax,16[rdx] + + sub rbx,1 + mov (8-1)[rdi],rcx + + mov (16-1)[rdi],rax + lea rdi,(24-1)[rdi] + + att_jae copy_lp2 + ret + +copy_hnf_node2_1: + inc rdi + mov rax,8[rdx] + + sub rbx,1 + mov [rdx],rdi + + mov (8-1)[rdi],rax + lea rdi,(16-1)[rdi] + + att_jae copy_lp2 + ret + +copy_hnf_node2_3: + mov [rdi],rcx + inc rdi + + mov [rdx],rdi + mov rcx,8[rdx] + + mov (8-1)[rdi],rcx + mov rcx,16[rdx] + + add rdi,24-1 + mov rdx,[rcx] + + test dl,1 + jne arguments_already_copied_2 + + mov (-8)[rdi],rdi + add rcx,8 + + mov [rdi],rdx + inc rdi + + mov (-8)[rcx],rdi + add rdi,8-1 + +cp_hnf_arg_lp2: + mov rdx,[rcx] + add rcx,8 + + mov [rdi],rdx + add rdi,8 + + dec rax + att_jne cp_hnf_arg_lp2 + + sub rbx,1 + att_jae copy_lp2 + ret + +arguments_already_copied_2: + mov (-8)[rdi],rdx + + sub rbx,1 + att_jae copy_lp2 + ret + +copy_arity_0_node2: + lea r9,dINT+2[rip] + cmp rcx,r9 + jb copy_real_file_or_string_2 + + lea r9,CHAR+2[rip] + cmp rcx,r9 + ja copy_normal_hnf_0_2 + +copy_int_bool_or_char_2: + mov rax,8[rdx] + je copy_char_2 + + lea r9,dINT+2[rip] + cmp rcx,r9 + jne no_small_int_or_char_2 + +copy_int_2: + cmp rax,33 + att_jae no_small_int_or_char_2 + + shl rax,4 + add rbp,8 + + lea r9,small_integers[rip] + add rax,r9 + sub rbx,1 + + mov (-8)[rbp],rax + att_jae copy_lp2 + ret + +copy_char_2: + and rax,255 + + shl rax,4 + add rbp,8 + + lea r9,static_characters[rip] + add rax,r9 + sub rbx,1 + + mov (-8)[rbp],rax + att_jae copy_lp2 + ret + +no_small_int_or_char_2: + .if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP +copy_record_node2_1_b: + .endif + mov (-16)[rsi],rcx + add rbp,8 + + mov (-8)[rsi],rax + sub rsi,15 + + mov [rdx],rsi + dec rsi + + mov (-8)[rbp],rsi + + sub rbx,1 + att_jae copy_lp2 + ret + +copy_normal_hnf_0_2: + sub rcx,2-(-8) + sub rbx,1 + + mov [rbp],rcx + lea rbp,8[rbp] + att_jae copy_lp2 + ret + +copy_real_file_or_string_2: + lea r9,__STRING__+2[rip] + cmp rcx,r9 + jbe copy_string_or_array_2 + +copy_real_or_file_2: + mov (-24)[rsi],rcx + sub rsi,24-1 + + mov [rdx],rsi + dec rsi + + mov rax,8[rdx] + mov rcx,16[rdx] + + mov [rbp],rsi + add rbp,8 + + mov 8[rsi],rax + sub rbx,1 + + mov 16[rsi],rcx + + att_jae copy_lp2 + ret + +already_copied_2: + dec rcx + sub rbx,1 + + mov [rbp],rcx + lea rbp,8[rbp] + + att_jae copy_lp2 + ret + +copy_record_2: + sub rax,258 + ja copy_record_node2_3 + + .if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + jb copy_record_node2_1 + + cmp word ptr (-2+2)[rcx],0 + att_je copy_real_or_file_2 + + mov qword ptr [rbp],rdi + mov qword ptr [rdi],rcx + + lea rcx,1[rdi] + mov rax,qword ptr 8[rdx] + + mov qword ptr [rdx],rcx + + mov qword ptr 8[rdi],rax + mov rax,qword ptr 16[rdx] + + add rbp,8 + mov qword ptr 16[rdi],rax + + add rdi,24 + sub rbx,1 + att_jae copy_lp2 + ret + +copy_record_node2_1: + mov rax,qword ptr 8[rdx] + + cmp word ptr (-2+2)[rcx],0 + att_je copy_record_node2_1_b + + mov qword ptr [rbp],rdi + mov qword ptr [rdi],rcx + + lea rcx,1[rdi] + mov qword ptr 8[rdi],rax + + mov qword ptr [rdx],rcx + add rbp,8 + + add rdi,16 + sub rbx,1 + att_jae copy_lp2 + ret + .else + mov qword ptr [rbp],rdi + mov qword ptr [rdi],rcx + + lea rcx,1[rdi] + mov rax,qword ptr 8[rdx] + + mov qword ptr [rdx],rcx + jb copy_record_node2_1 + + mov qword ptr 8[rdi],rax + mov rax,qword ptr 16[rdx] + + add rbp,8 + mov qword ptr 16[rdi],rax + + add rdi,24 + sub rbx,1 + jae copy_lp2 + ret + +copy_record_node2_1: + add rbp,8 + mov qword ptr 8[rdi],rax + + add rdi,16 + sub rbx,1 + jae copy_lp2 + ret + .endif + +copy_record_node2_3: + .if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + cmp word ptr (-2+2)[rcx],1 + jbe copy_record_node2_3_ab_or_b + .endif + + push rax + lea rax,1[rdi] + + mov qword ptr [rdx],rax + mov rax,qword ptr 16[rdx] + + mov qword ptr [rdi],rcx + mov rdx,qword ptr 8[rdx] + + .if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + mov qword ptr 8[rdi],rdx + mov qword ptr [rbp],rdi + add rbp,8 + + mov rcx,rax + test byte ptr [rax],1 + jne record_arguments_already_copied_2 + .else + mov rcx,rax + sub rax,qword ptr heap_p1 + + shr rax,4 + mov qword ptr 8[rdi],rdx + + mov rdx,rax + and rax,31 + + shr rdx,3 + mov qword ptr [rbp],rdi + + and rdx,-4 + mov eax,dword ptr (bit_set_table)[rax*4] + + add rdx,qword ptr heap_copied_vector + add rbp,8 + + test eax,[rdx] + jne record_arguments_already_copied_2 + + or [rdx],eax + .endif + lea rdx,24[rdi] + + pop rax + mov qword ptr 16[rdi],rdx + + add rdi,25 + mov rdx,qword ptr [rcx] + + mov qword ptr [rcx],rdi + add rcx,8 + + mov qword ptr (-1)[rdi],rdx + add rdi,7 + +cp_record_arg_lp2: + mov rdx,qword ptr [rcx] + add rcx,8 + + mov qword ptr [rdi],rdx + add rdi,8 + + sub rax,1 + att_jne cp_record_arg_lp2 + + sub rbx,1 + att_jae copy_lp2 + ret + +record_arguments_already_copied_2: + mov rdx,qword ptr [rcx] + pop rax + + mov qword ptr 16[rdi],rdx + add rdi,24 + + sub rbx,1 + att_jae copy_lp2 + ret + + .if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP +copy_record_node2_3_ab_or_b: + jb copy_record_node2_3_b + + push rax + lea rax,1[rdi] + + mov qword ptr [rdx],rax + mov rax,qword ptr 16[rdx] + + mov qword ptr [rdi],rcx + mov rdx,qword ptr 8[rdx] + + mov rcx,rax + sub rax,qword ptr heap_p1[rip] + + shr rax,4 + mov qword ptr 8[rdi],rdx + + mov rdx,rax + and rax,31 + + shr rdx,3 + mov qword ptr [rbp],rdi + + and rdx,-4 + lea r9,bit_set_table[rip] + mov eax,dword ptr [r9+rax*4] + + add rdx,qword ptr heap_copied_vector[rip] + add rbp,8 + + test eax,[rdx] + att_jne record_arguments_already_copied_2 + + or [rdx],eax + pop rax + + sub rsi,8 + + shl rax,3 + sub rsi,rax + + push rsi + add rsi,1 + + mov qword ptr 16[rdi],rsi + add rdi,24 + + mov rdx,qword ptr [rcx] + jmp cp_record_arg_lp3_c + +copy_record_node2_3_b: + push rax + lea rax,(-24+1)[rsi] + + mov qword ptr [rdx],rax + mov rax,qword ptr 16[rdx] + + mov qword ptr (-24)[rsi],rcx + mov rdx,qword ptr 8[rdx] + + mov rcx,rax + sub rax,qword ptr heap_p1[rip] + + shr rax,4 + mov qword ptr (-16)[rsi],rdx + + mov rdx,rax + and rax,31 + sub rsi,24 + + shr rdx,3 + mov qword ptr [rbp],rsi + + and rdx,-4 + lea r9,bit_set_table[rip] + mov eax,dword ptr [r9+rax*4] + + add rdx,qword ptr heap_copied_vector[rip] + add rbp,8 + + test eax,[rdx] + jne record_arguments_already_copied_3_b + + or [rdx],eax + pop rax + + mov rdx,rsi + sub rsi,8 + + shl rax,3 + sub rsi,rax + + mov qword ptr 16[rdx],rsi + + mov rdx,qword ptr [rcx] + + push rsi + add rsi,1 + +cp_record_arg_lp3_c: + mov qword ptr [rcx],rsi + add rcx,8 + mov qword ptr (-1) [rsi],rdx + add rsi,7 + +cp_record_arg_lp3: + mov rdx,qword ptr [rcx] + add rcx,8 + + mov qword ptr [rsi],rdx + add rsi,8 + + sub rax,8 + att_jne cp_record_arg_lp3 + + pop rsi + + sub rbx,1 + att_jae copy_lp2 + ret + +record_arguments_already_copied_3_b: + mov rdx,qword ptr [rcx] + pop rax + + sub rdx,1 + mov qword ptr 16[rsi],rdx + + sub rbx,1 + att_jae copy_lp2 + ret + .endif + +not_in_hnf_2: + test cl,1 + att_jne already_copied_2 + + movsxd rax,dword ptr (-4)[rcx] + test rax,rax + jle copy_arity_0_node2_ + +copy_node2_1_: + and rax,255 + sub rax,2 + jl copy_arity_1_node2 +copy_node2_3: + mov [rbp],rdi + add rbp,8 + mov [rdi],rcx + inc rdi + mov [rdx],rdi + mov rcx,8[rdx] + add rdx,16 + mov (8-1)[rdi],rcx + add rdi,16-1 + +cp_arg_lp2: + mov rcx,[rdx] + add rdx,8 + mov [rdi],rcx + add rdi,8 + sub rax,1 + att_jae cp_arg_lp2 + + sub rbx,1 + att_jae copy_lp2 + ret + +copy_arity_1_node2: +copy_arity_1_node2_: + mov [rbp],rdi + inc rdi + + add rbp,8 + mov [rdx],rdi + + mov rax,8[rdx] + mov (-1)[rdi],rcx + + mov (8-1)[rdi],rax + add rdi,24-1 + + sub rbx,1 + att_jae copy_lp2 + ret + +copy_indirection_2: + mov rax,rdx + mov rdx,8[rdx] + + mov rcx,[rdx] + test cl,2 + att_jne in_hnf_2 + + test cl,1 + att_jne already_copied_2 + + cmp dword ptr (-4)[rcx],-2 + je skip_indirections_2 + + movsxd rax,dword ptr(-4)[rcx] + test rax,rax + att_jle copy_arity_0_node2_ + att_jmp copy_node2_1_ + +skip_indirections_2: + mov rdx,8[rdx] + + mov rcx,[rdx] + test cl,2 + jne update_indirection_list_2 + test cl,1 + att_jne update_indirection_list_2 + + cmp dword ptr (-4)[rcx],-2 + att_je skip_indirections_2 + +update_indirection_list_2: + lea rcx,8[rax] + mov rax,8[rax] + mov [rcx],rdx + cmp rdx,rax + att_jne update_indirection_list_2 + + att_jmp continue_after_selector_2 + +copy_selector_2: + cmp rax,-2 + att_je copy_indirection_2 + jl copy_record_selector_2 + + mov rax,8[rdx] + + mov r10,[rax] + test r10b,2 + att_je copy_arity_1_node2_ + + movsxd r11,dword ptr (-8)[rcx] + + cmp word ptr (-2)[r10],2 + jbe copy_selector_2_ + + mov r10,16[rax] + + test byte ptr [r10],1 + att_jne copy_arity_1_node2_ + + movzx r11,word ptr (4-8)[rcx+r11] + lea r9,__indirection[rip] + mov qword ptr [rdx],r9 + + cmp r11,16 + jl copy_selector_2_1 + je copy_selector_2_2 + + mov rcx,qword ptr (-24)[r10+r11] + mov qword ptr 8[rdx],rcx + mov rdx,rcx + att_jmp continue_after_selector_2 + +copy_selector_2_1: + mov rcx,qword ptr 8[rax] + mov qword ptr 8[rdx],rcx + mov rdx,rcx + att_jmp continue_after_selector_2 + +copy_selector_2_2: + mov rcx,qword ptr [r10] + mov qword ptr 8[rdx],rcx + mov rdx,rcx + att_jmp continue_after_selector_2 + +copy_selector_2_: + movzx r11,word ptr (4-8)[rcx+r11] + lea r9,__indirection[rip] + mov qword ptr [rdx],r9 + + mov rcx,qword ptr [rax+r11] + mov qword ptr 8[rdx],rcx + mov rdx,rcx + att_jmp continue_after_selector_2 + +copy_record_selector_2: + cmp rax,-3 + mov rax,qword ptr 8[rdx] + mov r10,qword ptr [rax] + je copy_strict_record_selector_2 + + test r10b,2 + att_je copy_arity_1_node2_ + + movsxd r11,dword ptr (-8)[rcx] + + cmp word ptr (-2)[r10],258 + jbe copy_record_selector_2_ + + .if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + cmp word ptr (-2+2)[r10],2 + jae copy_selector_2__ + .endif + + mov r12,qword ptr 16[rax] + + lea r10,(-24)[r12] + sub r12,qword ptr heap_p1[rip] + + mov r13,r12 + and r12,31*16 + + shr r13,7 + + shr r12,2 + and r13,-4 + + add r13,qword ptr heap_copied_vector[rip] + + lea r9,bit_set_table[rip] + mov r12d,dword ptr [r9+r12] + + and r12d,dword ptr [r13] + .if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + att_je copy_record_selector_2_ + att_jmp copy_arity_1_node2_ +copy_selector_2__: + mov r12,qword ptr 16[rax] + lea r10,(-24)[r12] + test byte ptr [r12],1 + att_jne copy_arity_1_node2_ + .else + jne copy_arity_1_node2_ + .endif +copy_record_selector_2_: + movzx r11,word ptr (4-8)[rcx+r11] + lea r9,__indirection[rip] + mov qword ptr [rdx],r9 + + cmp r11,16 + jle copy_record_selector_3 + mov rax,r10 +copy_record_selector_3: + mov rcx,qword ptr [rax+r11] + mov qword ptr 8[rdx],rcx + mov rdx,rcx + att_jmp continue_after_selector_2 + +copy_strict_record_selector_2: + test r10b,2 + att_je copy_arity_1_node2_ + + movsxd r11,dword ptr (-8)[rcx] + + cmp word ptr (-2)[r10],258 + jbe copy_strict_record_selector_2_ + + .if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + cmp word ptr (-2+2)[r10],2 + jb copy_strict_record_selector_2_b + + mov r12,qword ptr 16[rax] + lea r10,(-24)[r12] + test byte ptr [r12],1 + att_jne copy_arity_1_node2_ + + att_jmp copy_strict_record_selector_2_ + +copy_strict_record_selector_2_b: + .endif + + mov r12,qword ptr 16[rax] + + lea r10,(-24)[r12] + sub r12,qword ptr heap_p1[rip] + + mov r13,r12 + and r12,31*16 + + shr r13,7 + + shr r12,2 + and r13,-4 + + add r13,qword ptr heap_copied_vector[rip] + + lea r9,bit_set_table[rip] + mov r12d,dword ptr [r9+r12] + + and r12d,[r13] + + att_jne copy_arity_1_node2_ + +copy_strict_record_selector_2_: + add r11,rcx + movzx rcx,word ptr (4-8)[r11] + cmp rcx,16 + jle copy_strict_record_selector_3 + mov rcx,qword ptr [r10+rcx] + jmp copy_strict_record_selector_4 +copy_strict_record_selector_3: + mov rcx,qword ptr [rax+rcx] +copy_strict_record_selector_4: + mov qword ptr 8[rdx],rcx + + movzx rcx,word ptr (6-8)[r11] + test rcx,rcx + je copy_strict_record_selector_6 + cmp rcx,16 + jle copy_strict_record_selector_5 + mov rax,r10 +copy_strict_record_selector_5: + mov rcx,qword ptr [rax+rcx] + mov qword ptr 16[rdx],rcx +copy_strict_record_selector_6: + + mov rcx,qword ptr ((-8)-8)[r11] + mov qword ptr [rdx],rcx + att_jmp in_hnf_2 + +copy_arity_0_node2_: + att_jl copy_selector_2 + + mov (-24)[rsi],rcx + sub rsi,24 + mov [rbp],rsi + lea rax,1[rsi] + + add rbp,8 + mov [rdx],rax + + sub rbx,1 + att_jae copy_lp2 + ret + +copy_string_or_array_2: + mov rcx,rdx + jne copy_array_2 + mov rax,rcx + + sub rax,heap_p1[rip] + cmp rax,semi_space_size[rip] + jae copy_string_constant + + mov rdx,8[rcx] + add rbp,8 + + add rdx,7 + push rbx + + mov rax,rdx + and rdx,-8 + + shr rax,3 + sub rsi,rdx + + mov rbx,[rcx] + add rcx,8 + + mov (-16)[rsi],rbx + sub rsi,16 + + mov (-8)[rbp],rsi + lea rdx,1[rsi] + + mov (-8)[rcx],rdx + lea rdx,8[rsi] + +cp_s_arg_lp2: + mov rbx,[rcx] + add rcx,8 + + mov [rdx],rbx + add rdx,8 + + sub rax,1 + att_jge cp_s_arg_lp2 + + pop rbx + sub rbx,1 + att_jae copy_lp2 + ret + +copy_string_constant: + mov qword ptr [rbp],rdx + add rbp,8 + + sub rbx,1 + att_jae copy_lp2 + ret + +copy_array_2: + push rbx + + mov rax,qword ptr 16[rcx] + test rax,rax + je copy_array_a2 + + movzx rbx,word ptr (-2)[rax] + + test rbx,rbx + je copy_strict_basic_array_2 + + sub rbx,256 + imul rbx,qword ptr 8[rcx] + jmp copy_array_a3 + +copy_array_a2: + mov rbx,qword ptr 8[rcx] +copy_array_a3: + mov rdx,rdi + lea rdi,24[rdi+rbx*8] + + mov qword ptr [rbp],rdx + mov rax,qword ptr [rcx] + + add rbp,8 + mov qword ptr [rdx],rax + + lea rax,1[rdx] + add rdx,8 + + mov qword ptr [rcx],rax + add rcx,8 + + lea rax,1[rbx] + att_jmp cp_s_arg_lp2 + +copy_strict_basic_array_2: + mov rbx,qword ptr 8[rcx] + + lea r9,dINT+2[rip] + cmp rax,r9 + jle copy_int_or_real_array_2 + + lea r9,BOOL+2[rip] + cmp rax,r9 + je copy_bool_array_2 + +copy_int32_or_real32_array_2: + add rbx,1 + shr rbx,1 + +copy_int_or_real_array_2: + shl rbx,3 + lea rdx,(-24)[rsi] + + sub rdx,rbx + mov rax,qword ptr [rcx] + + shr rbx,3 + mov qword ptr [rbp],rdx + + add rbp,8 + mov rsi,rdx + + mov qword ptr [rdx],rax + lea rax,1[rdx] + + add rdx,8 + mov qword ptr [rcx],rax + + add rcx,8 + lea rax,1[rbx] + att_jmp cp_s_arg_lp2 + +copy_bool_array_2: + add rbx,7 + shr rbx,3 + att_jmp copy_int_or_real_array_2 + +end_copy1: + mov heap_end_after_gc[rip],rsi + + lea rcx,finalizer_list[rip] + lea rdx,free_finalizer_list[rip] + mov rbp,qword ptr finalizer_list[rip] + +determine_free_finalizers_after_copy: + mov rax,qword ptr [rbp] + test al,1 + je finalizer_not_used_after_copy + + mov rbp,qword ptr 8[rbp] + sub rax,1 + mov qword ptr [rcx],rax + lea rcx,8[rax] + att_jmp determine_free_finalizers_after_copy + +finalizer_not_used_after_copy: + lea r9,__Nil-8[rip] + cmp rbp,r9 + je end_finalizers_after_copy + + mov qword ptr [rdx],rbp + lea rdx,8[rbp] + mov rbp,qword ptr 8[rbp] + att_jmp determine_free_finalizers_after_copy + +end_finalizers_after_copy: + mov qword ptr [rcx],rbp + mov qword ptr [rdx],rbp diff --git a/macho64/afileIO3.s b/macho64/afileIO3.s new file mode 100644 index 0000000..64a270e --- /dev/null +++ b/macho64/afileIO3.s @@ -0,0 +1,915 @@ + + .intel_syntax noprefix + + .macro att_jmp + .att_syntax + jmp $0 + .intel_syntax noprefix + .endmacro + + .macro att_call + .att_syntax + call $0 + .intel_syntax noprefix + .endmacro + + .macro att_ja + .att_syntax + ja $0 + .intel_syntax noprefix + .endmacro + + .macro att_jnc + .att_syntax + jnc $0 + .intel_syntax noprefix + .endmacro + + .macro att_jns + .att_syntax + jns $0 + .intel_syntax noprefix + .endmacro + + .data + .align 3 + +freadstring_error: + .ascii "Error in freadsubstring parameters." + .byte 10,0 + .byte 0,0,0 +fwritestring_error: + .ascii "Error in fwritesubstring parameters." + .byte 10,0 + .byte 0,0 + + .text + + .globl stdioF + .globl stderrF + .globl openF + .globl closeF + .globl reopenF + .globl readFC + .globl readFI + .globl readFR + .globl readFS + .globl readFString + .globl readLineF + .globl writeFC + .globl writeFI + .globl writeFR + .globl writeFS + .globl writeFString + .globl endF + .globl errorF + .globl positionF + .globl seekF + .globl shareF +.if 0 + .globl flushF +.endif + .globl openSF + .globl readSFC + .globl readSFI + .globl readSFR + .globl readSFS + .globl readLineSF + .globl endSF + .globl positionSF + .globl seekSF + + .globl _open_file + .globl _open_stdio + .globl _open_stderr + .globl _re_open_file + .globl _close_file + .globl _file_read_char + .globl _file_read_int + .globl _file_read_real + .globl _file_read_characters + .globl _file_read_line + .globl _file_write_char + .globl _file_write_int + .globl _file_write_real + .globl _file_write_characters + .globl _file_end + .globl _file_error + .globl _file_position + .globl _file_seek + .globl _file_share +.if 0 + .globl flush_file_buffer +.endif + .globl _open_s_file + .globl _file_read_s_char + .globl _file_read_s_int + .globl _file_read_s_real + .globl _file_read_s_string + .globl _file_read_s_line + .globl _file_s_end + .globl _file_s_position + .globl _file_s_seek + + .globl collect_0 + .globl collect_1 + + .globl print_error + + .globl __STRING__ + +stdioF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + att_call _open_stdio + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov rbx,rax + mov rax,-1 + ret + +stderrF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + att_call _open_stderr + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov rbx,rax + mov rax,-1 + ret + +openF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rax + mov r14,rdi + lea rdi,8[rcx] + att_call _open_file + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + xor r10,r10 + test rax,rax + setns r10b + mov rbx,rax + mov rax,-1 + ret + +closeF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + mov rdi,rbx + att_call _close_file + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + ret + +reopenF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rax + mov rbx,r10 + mov r14,rdi + mov rdi,r10 + att_call _re_open_file + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov r10d,eax + mov rax,-1 + ret + +readFC: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + mov rdi,rbx + att_call _file_read_char + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + cmp eax,-1 + je readFC_eof + + mov r10,rax + mov rax,-1 + mov r11,1 + ret + +readFC_eof: + xor r10,r10 + mov rax,-1 + xor r11,r11 + ret + +readFI: + mov rbp,rsp + sub rsp,8 + and rsp,-16 + mov r13,rsi + lea rsi,[rsp] + mov r14,rdi + mov rdi,rbx + att_call _file_read_int + mov r10,[rsp] + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov r11,rax + mov rax,-1 + ret + +readFR: + mov rbp,rsp + sub rsp,8 + and rsp,-16 + mov r13,rsi + lea rsi,[rsp] + mov r14,rdi + mov rdi,rbx + att_call _file_read_real + + movlpd xmm0,[rsp] + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov r10,rax + mov rax,-1 + ret + +readFString: + mov rbp,8[rcx] + cmp r11,rbp + jae readFString_error + + sub rbp,r11 + cmp r10,rbp + att_ja readFString_error + + push rcx + + push r11 + + lea rdx,16[rcx+r11] + mov rbp,rsp + or rsp,-16 + mov r13,rsi + mov rsi,rbp + mov r14,rdi + mov rdi,rbx + att_call _file_read_characters + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + add rsp,8 + pop rcx + + mov r10,rax + mov rax,-1 + ret + +readFString_error: + lea rbp,freadstring_error[rip] + att_jmp print_error + +readFS: lea rbp,16+7[rax] + shr rbp,3 + sub r15,rbp + jb readFS_gc +readFS_r_gc: + add r15,rbp + + lea rbx,__STRING__+2[rip] + mov qword ptr [rdi],rbx + mov 8[rdi],rax + mov rbx,r10 + + lea rdx,16[rdi] + mov rbp,rsp + and rsp,-16 + mov r13,rsi + lea rsi,8[rdi] + mov r14,rdi + mov rdi,r10 + att_call _file_read_characters + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + +readFS_end: + add rax,16+7 + mov rcx,rdi + shr rax,3 + sub r15,rax + lea rdi,[rdi+rax*8] + mov rax,-1 + ret + +readFS_gc: push rbp + att_call collect_0 + pop rbp + att_jmp readFS_r_gc + +readLineF: + cmp r15,32+2 + jb readLineF_gc + +readLineF_r_gc: + lea rdx,__STRING__+2[rip] + mov qword ptr [rdi],rdx + + lea rdx,16[rdi] + mov rbp,rsp + and rsp,-16 + mov r13,rsi + lea rsi,-16[r15*8] + mov r14,rdi + mov rdi,rbx + att_call _file_read_line + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov 8[rdi],rax + + test rax,rax + att_jns readFS_end + + lea rax,-16[r15*8] + mov r12,rdi + mov 8[rdi],rax + add rdi,16 + +readLineF_lp: + add rdi,rax + + mov r13,8[r12] + mov rcx,r12 + shr r13,3 + xor r15,r15 + add r13,2+32 + sub r15,r13 + + att_call collect_1 + + add r15,r13 + mov rax,8[rcx] + lea rdx,16[rcx] + lea rcx,7[rax] + shr rcx,3 + sub r15,2 + sub r15,rcx + + mov r12,rdi + + lea rbp,__STRING__+2[rip] + mov qword ptr [rdi],rbp + + mov 8[rdi],rax + add rdi,16 + jmp st_copy_string1 + +copy_st_lp1: + mov rbp,[rdx] + add rdx,8 + mov [rdi],rbp + add rdi,8 +st_copy_string1: + sub rcx,1 + att_jnc copy_st_lp1 + + mov rdx,rdi + mov rbp,rsp + and rsp,-16 + mov r13,rsi + lea rsi,[r15*8] + mov r14,rdi + mov rdi,rbx + att_call _file_read_line + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + test rax,rax + js readLineF_again + + add 8[r12],rax + add rax,7 + + mov rcx,r12 + + shr rax,3 + sub r15,rax + lea rdi,[rdi+rax*8] + + mov rax,-1 + ret + +readLineF_gc: + sub r15,32+2 + att_call collect_0 + add r15,32+2 + att_jmp readLineF_r_gc + +readLineF_again: + mov rcx,8[r12] + lea rax,[r15*8] + add rcx,rax + mov 8[r12],rcx + att_jmp readLineF_lp + +writeFC: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rbx + mov r14,rdi + mov rdi,r10 + att_call _file_write_char + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov rax,-1 + ret + +writeFI: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rbx + mov r14,rdi + mov rdi,r10 + att_call _file_write_int + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov rax,-1 + ret + +writeFR: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + mov rdi,rbx + att_call _file_write_real + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov rax,-1 + ret + +writeFS: + mov rdx,rbx + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,8[rcx] + mov r14,rdi + lea rdi,16[rcx] + att_call _file_write_characters + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov rax,-1 + ret + +writeFString: + mov rbp,8[rcx] + cmp r11,rbp + jae writeFString_error + + sub rbp,r11 + cmp r10,rbp + att_ja writeFString_error + + mov rdx,rbx + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,r10 + mov r14,rdi + lea rdi,16[rcx+r11] + att_call _file_write_characters + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov rax,-1 + + ret + +writeFString_error: + lea rbp,fwritestring_error[rip] + att_jmp print_error + +endF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + mov rdi,rbx + att_call _file_end + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov r10,rax + mov rax,-1 + ret + +errorF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + mov rdi,rbx + att_call _file_error + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov r10,rax + mov rax,-1 + ret + +positionF: + and rsp,-16 + mov r13,rsi + mov r14,rdi + mov rbp,rsp + mov rdi,rbx + att_call _file_position + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov r10,rax + mov rax,-1 + ret + +seekF: + mov rdx,rax + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rbx + mov rbx,r11 + mov r14,rdi + mov rdi,r11 + att_call _file_seek + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov r10,rax + mov rax,-1 + ret + +shareF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + mov rdi,rbx + att_call _file_share + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov rax,-1 + ret +.if 0 +flushF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + mov rdi,rbx + call flush_file_buffer + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov r10,rax + mov rax,-1 + ret +.endif + +openSF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rax + mov r14,rdi + lea rdi,8[rcx] + att_call _open_s_file + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + xor r10,r10 + test rax,rax + setns r10b + + mov rbx,rax + xor rax,rax + ret + +readSFC: + push rax + + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rbp + mov r14,rdi + mov rdi,rbx + att_call _file_read_s_char + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + cmp rax,-1 + je readSFC_eof + + pop rax + mov r10,rax + mov r11,1 + ret + +readSFC_eof: + pop rax + xor r10,r10 + xor r11,r11 + ret + +readSFI: + push rax + mov rdx,rsp + sub rsp,8 + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rbp + mov r14,rdi + mov rdi,rbx + att_call _file_read_s_int + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + pop r10 + mov r11,rax + pop rax + + ret + +readSFR: + push rax + mov rdx,rsp + sub rsp,8 + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rbp + mov r14,rdi + mov rdi,rbx + att_call _file_read_s_real + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + movlpd xmm0,[rsp] + mov r10,rax + add rsp,8 + pop rax + + ret + +readSFS: + lea rbp,16+7[rax] + shr rbp,3 + sub r15,rbp + jb readSFS_gc +readSFS_r_gc: + add r15,rbp + + lea rcx,__STRING__+2[rip] + mov qword ptr [rdi],rcx + + push rbx + + mov rbx,r10 + + mov rcx,rsp + lea rdx,8[rdi] + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rax + mov r14,rdi + mov rdi,r10 + att_call _file_read_s_string + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + +readSFS_end: + add rax,16+7 + mov rcx,rdi + shr rax,3 + sub r15,rax + lea rdi,[rdi+rax*8] + pop rax + ret + +readSFS_gc: push rbp + att_call collect_0 + pop rbp + att_jmp readSFS_r_gc + +readLineSF: + cmp r15,32+2 + ja readLineSF_gc + +readLineSF_r_gc: + push rax + + lea rcx,__STRING__+2[rip] + mov qword ptr [rdi],rcx + + mov rcx,rsp + lea rdx,16[rdi] + mov rbp,rsp + and rsp,-16 + mov r13,rsi + lea rsi,-16[r15*8] + mov r14,rdi + mov rdi,rbx + att_call _file_read_s_line + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov 8[rdi],rax + + test rax,rax + att_jns readSFS_end + + lea rax,-16[r15*8] + mov r12,rdi + mov 8[rdi],rax + add rdi,16 + +readLineSF_lp: + add rdi,rax + + mov r13,8[r12] + mov rcx,r12 + shr r13,3 + xor r15,r15 + add r13,2+32 + sub r15,r13 + + att_call collect_1 + + add r15,r13 + mov rax,8[rcx] + lea rdx,16[rcx] + lea rcx,7[rax] + shr rcx,3 + sub r15,2 + sub r15,rcx + + mov r12,rdi + + lea rbp,__STRING__+2[rip] + mov qword ptr [rdi],rbp + + mov 8[rdi],rax + add rdi,16 + jmp st_copy_string2 + +copy_st_lp2: + mov rbp,[rdx] + add rdx,8 + mov [rdi],rbp + add rdi,8 +st_copy_string2: + sub rcx,1 + att_jnc copy_st_lp2 + + mov r9,rsp + mov r8,rdi + lea rdx,[r15*8] + mov rcx,rbx + + mov rbp,rsp + or rsp,8 + sub rsp,40 + att_call _file_read_s_line + mov rsp,rbp + + test rax,rax + js readLineSF_again + + add 8[r12],rax + add rax,7 + + mov rcx,r12 + + shr rax,3 + sub r15,rax + lea rdi,[rdi+rax*8] + + pop rax + ret + +readLineSF_gc: + sub r15,32+2 + att_call collect_0 + add r15,32+2 + att_jmp readLineSF_r_gc + +readLineSF_again: + mov rcx,8[r12] + lea rax,[r15*8] + add rcx,rax + mov 8[r12],rcx + att_jmp readLineSF_lp + +endSF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rax + mov r14,rdi + mov rdi,rbx + att_call _file_s_end + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + ret + +positionSF: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,rax + mov r14,rdi + mov rdi,rbx + att_call _file_s_position + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + ret + +seekSF: + push rbx + mov rcx,rsp + mov rdx,rax + + mov rbx,r11 + + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov rsi,r10 + mov r14,rdi + mov rdi,r11 + att_call _file_s_seek + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + + mov r10,rax + pop rax + + ret diff --git a/macho64/amark.s b/macho64/amark.s new file mode 100644 index 0000000..bb0d9dc --- /dev/null +++ b/macho64/amark.s @@ -0,0 +1,1933 @@ + + mov rax,qword ptr heap_size_65[rip] + xor rbx,rbx + + mov qword ptr n_marked_words[rip],rbx + shl rax,6 + + mov qword ptr lazy_array_list[rip],rbx + mov qword ptr heap_size_64_65[rip],rax + + lea rsi,(-4000)[rsp] + + mov rax,qword ptr caf_list[rip] + + mov qword ptr end_stack[rip],rsi + + mov r10,neg_heap_p3[rip] + mov r11,heap_size_64_65[rip] + mov r13,qword ptr end_stack[rip] + mov r14,0 + + test rax,rax + je _end_mark_cafs + +_mark_cafs_lp: + mov rbx,qword ptr [rax] + mov rbp,qword ptr (-8)[rax] + + push rbp + lea rbp,8[rax] + lea r12,8[rax+rbx*8] + + call _mark_stack_nodes + + pop rax + test rax,rax + att_jne _mark_cafs_lp + +_end_mark_cafs: + mov rsi,qword ptr stack_top[rip] + mov rbp,qword ptr stack_p[rip] + + mov r12,rsi + att_call _mark_stack_nodes + +continue_mark_after_pmark: + mov qword ptr n_marked_words[rip],r14 + + mov rcx,qword ptr lazy_array_list[rip] + + test rcx,rcx + je end_restore_arrays + +restore_arrays: + mov rbx ,qword ptr [rcx] + lea r9,__ARRAY__+2[rip] + mov qword ptr [rcx],r9 + + cmp rbx,1 + je restore_array_size_1 + + lea rdx,[rcx+rbx*8] + mov rax,qword ptr 16[rdx] + test rax,rax + je restore_lazy_array + + mov rbp,rax + push rdx + + xor rdx,rdx + mov rax,rbx + movzx rbx,word ptr (-2+2)[rbp] + + div rbx + mov rbx,rax + + pop rdx + mov rax,rbp + +restore_lazy_array: + mov rdi,qword ptr 16[rcx] + mov rbp,qword ptr 8[rcx] + mov qword ptr 8[rcx],rbx + mov rsi,qword ptr 8[rdx] + mov qword ptr 16[rcx],rax + mov qword ptr 8[rdx],rbp + mov qword ptr 16[rdx],rdi + + test rax,rax + je no_reorder_array + + movzx rdx,word ptr (-2)[rax] + sub rdx,256 + movzx rbp,word ptr (-2+2)[rax] + cmp rbp,rdx + att_je no_reorder_array + + add rcx,24 + imul rbx,rdx + mov rax,rdx + lea rdx,[rcx+rbx*8] + mov rbx,rbp + sub rax,rbp + + att_call reorder + +no_reorder_array: + mov rcx,rsi + test rcx,rcx + att_jne restore_arrays + + att_jmp end_restore_arrays + +restore_array_size_1: + mov rbp,qword ptr 8[rcx] + mov rdx,qword ptr 16[rcx] + mov qword ptr 8[rcx],rbx + mov rax,qword ptr 24[rcx] + mov qword ptr 24[rcx],rbp + mov qword ptr 16[rcx],rax + + mov rcx,rdx + test rcx,rcx + att_jne restore_arrays + +end_restore_arrays: + mov rdi,qword ptr heap_vector[rip] + lea rcx,finalizer_list[rip] + lea rdx,free_finalizer_list[rip] + + mov rbp,qword ptr [rcx] +determine_free_finalizers_after_mark: + lea r9,__Nil-8[rip] + cmp rbp,r9 + je end_finalizers_after_mark + + lea rax,[r10+rbp] + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rax] + test esi,dword ptr [rdi+rbx*4] + je finalizer_not_used_after_mark + + lea rcx,8[rbp] + mov rbp,qword ptr 8[rbp] + att_jmp determine_free_finalizers_after_mark + +finalizer_not_used_after_mark: + mov qword ptr [rdx],rbp + lea rdx,8[rbp] + + mov rbp,qword ptr 8[rbp] + mov qword ptr [rcx],rbp + att_jmp determine_free_finalizers_after_mark + +end_finalizers_after_mark: + mov qword ptr [rdx],rbp + + att_call add_garbage_collect_time + + mov rax,qword ptr bit_vector_size[rip] + + mov rdi,qword ptr n_allocated_words[rip] + add rdi,qword ptr n_marked_words[rip] + shl rdi,3 + + mov rsi,rax + shl rsi,3 + + push rdx + push rax + + mov rax,rdi + mul qword ptr _heap_size_multiple[rip] + shrd rax,rdx,8 + shr rdx,8 + + mov rbx,rax + test rdx,rdx + + pop rax + pop rdx + + je not_largest_heap + + mov rbx,qword ptr heap_size_65[rip] + shl rbx,6 + +not_largest_heap: + cmp rbx,rsi + jbe no_larger_heap + + mov rsi,qword ptr heap_size_65[rip] + shl rsi,6 + cmp rbx,rsi + jbe not_larger_than_heap + mov rbx,rsi +not_larger_than_heap: + mov rax,rbx + shr rax,3 + mov qword ptr bit_vector_size[rip],rax +no_larger_heap: + + mov rbp,rax + + mov rdi,qword ptr heap_vector[rip] + + shr rbp,5 + + test al,31 + je no_extra_word + + mov dword ptr [rdi+rbp*4],0 + +no_extra_word: + sub rax,qword ptr n_marked_words[rip] + shl rax,3 + mov qword ptr n_last_heap_free_bytes[rip],rax + + mov rax,qword ptr n_marked_words[rip] + shl rax,3 + add qword ptr total_gc_bytes[rip],rax + + test qword ptr _flags[rip],2 + je _no_heap_use_message2 + + mov r12,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + + lea rdi,marked_gc_string_1[rip] + .else + sub rsp,32 + + lea rcx,marked_gc_string_1 + .endif + att_call _ew_print_string + + .if LINUX + mov rdi,qword ptr n_marked_words[rip] + shl rdi,3 + .else + mov rcx,qword ptr n_marked_words + shl rcx,3 + .endif + att_call _ew_print_int + + .if LINUX + lea rdi,heap_use_after_gc_string_2[rip] + .else + lea rcx,heap_use_after_gc_string_2 + .endif + att_call _ew_print_string + + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,r12 + +_no_heap_use_message2: + att_call call_finalizers + + mov rsi,qword ptr n_allocated_words[rip] + xor rbx,rbx + + mov rcx,rdi + mov qword ptr n_free_words_after_mark[rip],rbx + +_scan_bits: + cmp ebx,dword ptr [rcx] + je _zero_bits + mov dword ptr [rcx],ebx + add rcx,4 + sub rbp,1 + att_jne _scan_bits + + jmp _end_scan + +_zero_bits: + lea rdx,4[rcx] + add rcx,4 + sub rbp,1 + jne _skip_zero_bits_lp1 + jmp _end_bits + +_skip_zero_bits_lp: + test rax,rax + jne _end_zero_bits +_skip_zero_bits_lp1: + mov eax,dword ptr [rcx] + add rcx,4 + sub rbp,1 + att_jne _skip_zero_bits_lp + + test rax,rax + att_je _end_bits + mov rax,rcx + mov dword ptr (-4)[rcx],ebx + sub rax,rdx + jmp _end_bits2 + +_end_zero_bits: + mov rax,rcx + sub rax,rdx + shl rax,3 + add qword ptr n_free_words_after_mark[rip],rax + mov dword ptr (-4)[rcx],ebx + + cmp rax,rsi + att_jb _scan_bits + +_found_free_memory: + mov qword ptr bit_counter[rip],rbp + mov qword ptr bit_vector_p[rip],rcx + + lea rbx,(-4)[rdx] + sub rbx,rdi + shl rbx,6 + mov rdi,qword ptr heap_p3[rip] + add rdi,rbx + + mov r15,rax + lea rbx,[rdi+rax*8] + + sub r15,rsi + mov rsi,qword ptr stack_top[rip] + + mov qword ptr heap_end_after_gc[rip],rbx + + att_jmp restore_registers_after_gc_and_return + +_end_bits: + mov rax,rcx + sub rax,rdx + add rax,4 +_end_bits2: + shl rax,3 + add qword ptr n_free_words_after_mark[rip],rax + cmp rax,rsi + att_jae _found_free_memory + +_end_scan: + mov qword ptr bit_counter[rip],rbp + att_jmp compact_gc + +/* %rbp : pointer to stack element */ +/* %rdi : heap_vector */ +/* %rax ,%rbx ,%rcx ,%rdx ,%rsi : free */ + +_mark_stack_nodes: + cmp rbp,r12 + je _end_mark_nodes +_mark_stack_nodes_: + mov rcx,qword ptr [rbp] + + add rbp,8 + lea rdx,[r10+rcx] + + cmp rdx,r11 + att_jnc _mark_stack_nodes + + mov rbx,rdx + and rdx,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rdx] + + test esi,dword ptr [rdi+rbx*4] + att_jne _mark_stack_nodes + + push rbp + push 0 + jmp _mark_arguments + +_mark_hnf_2: + cmp rsi,0x20000000 + jbe fits_in_word_6 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_6: + add r14,3 + +_mark_record_2_c: + mov rbx,qword ptr 8[rcx] + push rbx + + cmp rsp,r13 + jb __mark_using_reversal + +_mark_node2: +_shared_argument_part: + mov rcx,qword ptr [rcx] + +_mark_node: + lea rdx,[r10+rcx] + cmp rdx,r11 + jnc _mark_next_node + + mov rbx,rdx + and rdx,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rdx] + + test esi,dword ptr [rdi+rbx*4] + att_jne _mark_next_node + +_mark_arguments: + mov rax,qword ptr [rcx] + test rax,2 + je _mark_lazy_node + + movzx rbp,word ptr (-2)[rax] + + test rbp,rbp + je _mark_hnf_0 + + or dword ptr [rdi+rbx*4],esi + add rcx,8 + + cmp rbp,256 + jae _mark_record + + sub rbp,2 + att_je _mark_hnf_2 + jb _mark_hnf_1 + +_mark_hnf_3: + mov rdx,qword ptr 8[rcx] + + cmp rsi,0x20000000 + jbe fits_in_word_1 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_1: + + add r14,3 + lea rax,[r10+rdx] + mov rbx,rax + + and rax,31*8 + shr rbx,8 + + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rax] + + test esi,dword ptr [rdi+rbx*4] + att_jne _shared_argument_part + +_no_shared_argument_part: + or dword ptr [rdi+rbx*4],esi + add rbp,1 + + add r14,rbp + lea rax,[rax+rbp*8] + lea rdx,(-8)[rdx+rbp*8] + + cmp rax,32*8 + jbe fits_in_word_2 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_2: + + mov rbx,qword ptr [rdx] + sub rbp,2 + push rbx + +_push_hnf_args: + mov rbx,qword ptr (-8)[rdx] + sub rdx,8 + push rbx + sub rbp,1 + att_jge _push_hnf_args + + cmp rsp,r13 + att_jae _mark_node2 + + att_jmp __mark_using_reversal + +_mark_hnf_1: + cmp rsi,0x40000000 + jbe fits_in_word_4 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_4: + add r14,2 + mov rcx,qword ptr [rcx] + att_jmp _mark_node + +_mark_lazy_node_1: + add rcx,8 + or dword ptr [rdi+rbx*4],esi + cmp rsi,0x20000000 + jbe fits_in_word_3 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_3: + add r14,3 + + cmp rbp,1 + att_je _mark_node2 + +_mark_selector_node_1: + add rbp,2 + mov rdx,qword ptr [rcx] + je _mark_indirection_node + + lea rsi,[r10+rdx] + mov rbx,rsi + + shr rbx,8 + and rsi,31*8 + + add rbp,1 + + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rsi] + jle _mark_record_selector_node_1 + + test esi,dword ptr [rdi+rbx*4] + jne _mark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + att_je _mark_node3 + + cmp word ptr (-2)[rbp],2 + jbe _small_tuple_or_record + +_large_tuple_or_record: + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + lea r8,bit_set_table2[rip] + mov ebp,dword ptr [r8+rbp] + test ebp,dword ptr [rdi+rbx*4] + att_jne _mark_node3 + + movsxd rbp,dword ptr(-8)[rax] + add rax,rbp + lea rbp,__indirection[rip] + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr (4-8)[rax] + mov rbp,rcx + + cmp rax,16 + jl _mark_tuple_selector_node_1 + mov rdx,r9 + je _mark_tuple_selector_node_2 + mov rcx,qword ptr (-24)[r9+rax] + mov qword ptr [rbp],rcx + att_jmp _mark_node + +_mark_tuple_selector_node_2: + mov rcx,qword ptr [r9] + mov qword ptr [rbp],rcx + att_jmp _mark_node + +_small_tuple_or_record: + movsxd rbp,dword ptr(-8)[rax] + add rax,rbp + lea rbp,__indirection[rip] + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr (4-8)[rax] + mov rbp,rcx +_mark_tuple_selector_node_1: + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rbp],rcx + att_jmp _mark_node + +_mark_record_selector_node_1: + je _mark_strict_record_selector_node_1 + + test esi,dword ptr [rdi+rbx*4] + att_jne _mark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + att_je _mark_node3 + + cmp word ptr (-2)[rbp],258 + att_jbe _small_tuple_or_record + + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + lea r8,bit_set_table2[rip] + mov ebp,dword ptr [r8+rbp] + test ebp,dword ptr [rdi+rbx*4] + att_jne _mark_node3 + + movsxd rbp,dword ptr(-8)[rax] + add rax,rbp + lea rbp,__indirection[rip] + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr (4-8)[rax] + mov rbp,rcx + + cmp rax,16 + jle _mark_record_selector_node_2 + mov rdx,r9 + sub rax,24 +_mark_record_selector_node_2: + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rbp],rcx + att_jmp _mark_node + +_mark_strict_record_selector_node_1: + test esi,dword ptr [rdi+rbx*4] + att_jne _mark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + att_je _mark_node3 + + cmp word ptr (-2)[rbp],258 + jbe _select_from_small_record + + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + lea r8,bit_set_table2[rip] + mov ebp,dword ptr [r8+rbp] + test ebp,dword ptr [rdi+rbx*4] + att_jne _mark_node3 + +_select_from_small_record: + movsxd rbx,dword ptr (-8)[rax] + add rax,rbx + sub rcx,8 + + movzx ebx,word ptr (4-8)[rax] + cmp rbx,16 + jle _mark_strict_record_selector_node_2 + mov rbx,qword ptr (-24)[r9+rbx] + jmp _mark_strict_record_selector_node_3 +_mark_strict_record_selector_node_2: + mov rbx,qword ptr [rdx+rbx] +_mark_strict_record_selector_node_3: + mov qword ptr 8[rcx],rbx + + movzx ebx,word ptr (6-8)[rax] + test rbx,rbx + je _mark_strict_record_selector_node_5 + cmp rbx,16 + jle _mark_strict_record_selector_node_4 + mov rdx,r9 + sub rbx,24 +_mark_strict_record_selector_node_4: + mov rbx,qword ptr [rdx+rbx] + mov qword ptr 16[rcx],rbx +_mark_strict_record_selector_node_5: + + mov rax,qword ptr ((-8)-8)[rax] + mov qword ptr [rcx],rax + att_jmp _mark_next_node + +_mark_indirection_node: +_mark_node3: + mov rcx,rdx + att_jmp _mark_node + +_mark_next_node: + pop rcx + test rcx,rcx + att_jne _mark_node + + pop rbp + cmp rbp,r12 + att_jne _mark_stack_nodes_ + +_end_mark_nodes: + ret + +_mark_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + je _mark_real_or_file + + cmp rbp,1 + att_jle _mark_lazy_node_1 + + cmp rbp,256 + jge _mark_closure_with_unboxed_arguments + inc rbp + or dword ptr [rdi+rbx*4],esi + + add r14,rbp + lea rdx,[rdx+rbp*8] + lea rcx,[rcx+rbp*8] + + cmp rdx,32*8 + jbe fits_in_word_7 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_7: + sub rbp,3 +_push_lazy_args: + mov rbx,qword ptr (-8)[rcx] + sub rcx,8 + push rbx + sub rbp,1 + att_jge _push_lazy_args + + sub rcx,8 + + cmp rsp,r13 + att_jae _mark_node2 + + att_jmp __mark_using_reversal + +_mark_closure_with_unboxed_arguments: + mov rax,rbp + and rbp,255 + sub rbp,1 + att_je _mark_real_or_file + + shr rax,8 + add rbp,2 + + or dword ptr [rdi+rbx*4],esi + add r14,rbp + lea rdx,[rdx+rbp*8] + + sub rbp,rax + + cmp rdx,32*8 + jbe fits_in_word_7_ + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_7_: + sub rbp,2 + att_jl _mark_next_node + + lea rcx,16[rcx+rbp*8] + att_jne _push_lazy_args + +_mark_closure_with_one_boxed_argument: + mov rcx,qword ptr (-8)[rcx] + att_jmp _mark_node + +_mark_hnf_0: + lea r9,dINT+2[rip] + cmp rax,r9 + jb _mark_real_file_or_string + + or dword ptr [rdi+rbx*4],esi + + lea r9,CHAR+2[rip] + cmp rax,r9 + ja _mark_normal_hnf_0 + +_mark_bool: + add r14,2 + + cmp rsi,0x40000000 + att_jbe _mark_next_node + + or dword ptr 4[rdi+rbx*4],1 + att_jmp _mark_next_node + +_mark_normal_hnf_0: + inc r14 + att_jmp _mark_next_node + +_mark_real_file_or_string: + lea r9,__STRING__+2[rip] + cmp rax,r9 + jbe _mark_string_or_array + +_mark_real_or_file: + or dword ptr [rdi+rbx*4],esi + add r14,3 + + cmp rsi,0x20000000 + att_jbe _mark_next_node + + or dword ptr 4[rdi+rbx*4],1 + att_jmp _mark_next_node + +_mark_record: + sub rbp,258 + je _mark_record_2 + jl _mark_record_1 + +_mark_record_3: + add r14,3 + + cmp rsi,0x20000000 + jbe fits_in_word_13 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_13: + mov rdx,qword ptr 8[rcx] + + movzx rbx,word ptr (-2+2)[rax] + lea rsi,[r10+rdx] + + mov rax,rsi + and rsi,31*8 + + shr rax,8 + sub rbx,1 + + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rsi] + jb _mark_record_3_bb + + test edx,dword ptr [rdi+rax*4] + att_jne _mark_node2 + + add rbp,1 + or dword ptr [rdi+rax*4],edx + add r14,rbp + lea rsi,[rsi+rbp*8] + + cmp rsi,32*8 + jbe _push_record_arguments + or dword ptr 4[rdi+rax*4],1 +_push_record_arguments: + mov rdx,qword ptr 8[rcx] + mov rbp,rbx + shl rbx,3 + add rdx,rbx + sub rbp,1 + att_jge _push_hnf_args + + att_jmp _mark_node2 + +_mark_record_3_bb: + test edx,dword ptr [rdi+rax*4] + att_jne _mark_next_node + + add rbp,1 + or dword ptr [rdi+rax*4],edx + add r14,rbp + lea rsi,[rsi+rbp*8] + + cmp rsi,32*8 + att_jbe _mark_next_node + + or dword ptr 4[rdi+rax*4],1 + att_jmp _mark_next_node + +_mark_record_2: + cmp rsi,0x20000000 + jbe fits_in_word_12 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_12: + add r14,3 + + cmp word ptr (-2+2)[rax],1 + att_ja _mark_record_2_c + att_je _mark_node2 + att_jmp _mark_next_node + +_mark_record_1: + cmp word ptr (-2+2)[rax],0 + att_jne _mark_hnf_1 + + att_jmp _mark_bool + +_mark_string_or_array: + je _mark_string_ + +_mark_array: + mov rbp,qword ptr 16[rcx] + test rbp,rbp + je _mark_lazy_array + + movzx rax,word ptr (-2)[rbp] + + test rax,rax + je _mark_strict_basic_array + + movzx rbp,word ptr (-2+2)[rbp] + test rbp,rbp + je _mark_b_record_array + + cmp rsp,r13 + jb _mark_array_using_reversal + + sub rax,256 + cmp rax,rbp + je _mark_a_record_array + +_mark_ab_record_array: + or dword ptr [rdi+rbx*4],esi + mov rbp,qword ptr 8[rcx] + + imul rax,rbp + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae _end_set_ab_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae _last_ab_array_bits + +_mark_ab_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + att_jb _mark_ab_array_lp + +_last_ab_array_bits: + or dword ptr [rdi+rbx*4],ebp + +_end_set_ab_array_bits: + mov rax,qword ptr 8[rcx] + mov rdx,qword ptr 16[rcx] + movzx rbx,word ptr (-2+2)[rdx] + movzx rdx,word ptr (-2)[rdx] + shl rbx,3 + lea rdx,(-2048)[rdx*8] + push rbx + push rdx + lea rbp,24[rcx] + push r12 + jmp _mark_ab_array_begin + +_mark_ab_array: + mov rbx,qword ptr 16[rsp] + push rax + push rbp + lea r12,[rbp+rbx] + + att_call _mark_stack_nodes + + mov rbx,qword ptr (8+16)[rsp] + pop rbp + pop rax + add rbp,rbx +_mark_ab_array_begin: + sub rax,1 + att_jnc _mark_ab_array + + pop r12 + add rsp,16 + att_jmp _mark_next_node + +_mark_a_record_array: + or dword ptr [rdi+rbx*4],esi + mov rbp,qword ptr 8[rcx] + + imul rax,rbp + push rax + + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae _end_set_a_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae _last_a_array_bits + +_mark_a_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + att_jb _mark_a_array_lp + +_last_a_array_bits: + or dword ptr [rdi+rbx*4],ebp + +_end_set_a_array_bits: + pop rax + lea rbp,24[rcx] + + push r12 + lea r12,24[rcx+rax*8] + + att_call _mark_stack_nodes + + pop r12 + att_jmp _mark_next_node + +_mark_lazy_array: + cmp rsp,r13 + att_jb _mark_array_using_reversal + + or dword ptr [rdi+rbx*4],esi + mov rax,qword ptr 8[rcx] + + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae _end_set_lazy_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae _last_lazy_array_bits + +_mark_lazy_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + att_jb _mark_lazy_array_lp + +_last_lazy_array_bits: + or dword ptr [rdi+rbx*4],ebp + +_end_set_lazy_array_bits: + mov rax,qword ptr 8[rcx] + lea rbp,24[rcx] + + push r12 + lea r12,24[rcx+rax*8] + + att_call _mark_stack_nodes + + pop r12 + att_jmp _mark_next_node + +_mark_array_using_reversal: + push 0 + mov rsi,1 + jmp __mark_node + +_mark_strict_basic_array: + mov rax,qword ptr 8[rcx] + lea r9,dINT+2[rip] + cmp rbp,r9 + jle _mark_strict_int_or_real_array + lea r9,BOOL+2[rip] + cmp rbp,r9 + je _mark_strict_bool_array +_mark_strict_int32_or_real32_array: + add rax,6+1 + shr rax,1 + jmp _mark_basic_array_ +_mark_strict_int_or_real_array: + add rax,3 + att_jmp _mark_basic_array_ +_mark_strict_bool_array: + add rax,24+7 + shr rax,3 + att_jmp _mark_basic_array_ + +_mark_b_record_array: + mov rbp,qword ptr 8[rcx] + sub rax,256 + imul rax,rbp + add rax,3 + att_jmp _mark_basic_array_ + +_mark_string_: + mov rax,qword ptr 8[rcx] + add rax,16+7 + shr rax,3 + +_mark_basic_array_: + or dword ptr [rdi+rbx*4],esi + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + att_jae _mark_next_node + + inc rbx + mov rbp,1 + cmp rbx,rax + jae _last_string_bits + +_mark_string_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + att_jb _mark_string_lp + +_last_string_bits: + or dword ptr [rdi+rbx*4],ebp + att_jmp _mark_next_node + +__end_mark_using_reversal: + pop rdx + test rdx,rdx + att_je _mark_next_node + mov qword ptr [rdx],rcx + att_jmp _mark_next_node + +__mark_using_reversal: + push rcx + mov rsi,1 + mov rcx,qword ptr [rcx] + att_jmp __mark_node + +__mark_arguments: + mov rax,qword ptr [rcx] + test al,2 + je __mark_lazy_node + + movzx rbp,word ptr (-2)[rax] + test rbp,rbp + je __mark_hnf_0 + + add rcx,8 + + cmp rbp,256 + jae __mark__record + + sub rbp,2 + je __mark_hnf_2 + jb __mark_hnf_1 + +__mark_hnf_3: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + + or dword ptr [rdi+rbx*4],edx + + cmp rdx,0x20000000 + + mov rax,qword ptr 8[rcx] + + jbe fits__in__word__1 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word__1: + add rax,r10 + + mov rbx,rax + and rax,31*8 + + shr rbx,8 + + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rax] + test edx,dword ptr [rdi+rbx*4] + jne __shared_argument_part + +__no_shared_argument_part: + or dword ptr [rdi+rbx*4],edx + mov rdx,qword ptr 8[rcx] + + add rbp,1 + mov qword ptr 8[rcx],rsi + + add r14,rbp + add rcx,8 + + shl rbp,3 + or qword ptr [rdx],1 + + add rax,rbp + add rdx,rbp + + cmp rax,32*8 + jbe fits__in__word__2 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word__2: + + mov rbp ,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rcx + lea rsi,(-8)[rdx] + mov rcx,rbp + att_jmp __mark_node + +__mark_hnf_1: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,2 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x40000000 + att_jbe __shared_argument_part + or dword ptr 4[rdi+rbx*4],1 +__shared_argument_part: + mov rbp,qword ptr [rcx] + mov qword ptr [rcx],rsi + lea rsi,2[rcx] + mov rcx,rbp + att_jmp __mark_node + +__mark_no_selector_2: + pop rbx +__mark_no_selector_1: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x20000000 + att_jbe __shared_argument_part + + or dword ptr 4[rdi+rbx*4],1 + att_jmp __shared_argument_part + +__mark_lazy_node_1: + att_je __mark_no_selector_1 + +__mark_selector_node_1: + add rbp,2 + je __mark_indirection_node + + add rbp,1 + + push rbx + mov rbp,qword ptr [rcx] + push rax + lea rax,[r10+rbp] + + jle __mark_record_selector_node_1 + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,dword ptr [rdi+rbx*4] + pop rax + att_jne __mark_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + att_je __mark_no_selector_2 + + cmp word ptr (-2)[rbx],2 + jbe __small_tuple_or_record + +__large_tuple_or_record: + mov r8,qword ptr 16[rbp] + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov r8d,dword ptr [r9+r8] + test r8d,dword ptr [rdi+rbx*4] + + mov r9,qword ptr 16[rbp] + + att_jne __mark_no_selector_2 + + movsxd rdx,dword ptr (-8)[rax] + add rax,rdx + lea rdx,__indirection[rip] + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr (4-8)[rax] + mov r8,rcx + + cmp rax,16 + jl __mark_tuple_selector_node_1 + je __mark_tuple_selector_node_2 + mov rcx,qword ptr (-24)[r9+rax] + mov qword ptr [r8],rcx + att_jmp __mark_node + +__mark_tuple_selector_node_2: + mov rcx,qword ptr [r9] + mov qword ptr [r8],rcx + att_jmp __mark_node + +__small_tuple_or_record: + movsxd rdx,dword ptr (-8)[rax] + add rax,rdx + lea rdx,__indirection[rip] + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr (4-8)[rax] + mov r8,rcx +__mark_tuple_selector_node_1: + mov rcx,qword ptr [rbp+rax] + mov qword ptr [r8],rcx + att_jmp __mark_node + att_jmp __mark_node + +__mark_record_selector_node_1: + je __mark_strict_record_selector_node_1 + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,dword ptr [rdi+rbx*4] + pop rax + att_jne __mark_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + att_je __mark_no_selector_2 + + cmp word ptr (-2)[rbx],258 + jbe __small_record + + mov r8,qword ptr 16[rbp] + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov r8d,dword ptr [r9+r8] + test r8d,dword ptr [rdi+rbx*4] + + mov r9,qword ptr 16[rbp] + + att_jne __mark_no_selector_2 + +__small_record: + movsxd rdx,dword ptr(-8)[rax] + add rax,rdx + lea rdx,__indirection[rip] + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr (4-8)[rax] + mov r8,rcx + + cmp rax,16 + jle __mark_record_selector_node_2 + mov rbp,r9 + sub rax,24 +__mark_record_selector_node_2: + mov rcx,qword ptr [rbp+rax] + mov qword ptr [r8],rcx + att_jmp __mark_node + +__mark_strict_record_selector_node_1: + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,dword ptr [rdi+rbx *4] + pop rax + att_jne __mark_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + att_je __mark_no_selector_2 + + cmp word ptr (-2)[rbx],258 + jle __select_from_small_record + + mov r8,qword ptr 16[rbp] + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov r8d,dword ptr [r9+r8] + test r8d,dword ptr [rdi+rbx*4] + + mov r9,qword ptr 16[rbp] + + att_jne __mark_no_selector_2 + +__select_from_small_record: + movsxd rbx,dword ptr(-8)[rax] + add rax,rbx + sub rcx,8 + + movzx ebx,word ptr (4-8)[rax] + cmp rbx,16 + jle __mark_strict_record_selector_node_2 + mov rbx,qword ptr (-24)[r9+rbx] + jmp __mark_strict_record_selector_node_3 +__mark_strict_record_selector_node_2: + mov rbx,qword ptr [rbp+rbx] +__mark_strict_record_selector_node_3: + mov qword ptr 8[rcx],rbx + + movzx ebx,word ptr (6-8)[rax] + test rbx,rbx + je __mark_strict_record_selector_node_5 + cmp rbx,16 + jle __mark_strict_record_selector_node_4 + mov rbp,r9 + sub rbx,24 +__mark_strict_record_selector_node_4: + mov rbx,qword ptr [rbp+rbx] + mov qword ptr 16[rcx],rbx +__mark_strict_record_selector_node_5: + pop rbx + + mov rax,qword ptr ((-8)-8)[rax] + mov qword ptr [rcx],rax + att_jmp __mark_node + +__mark_indirection_node: + mov rcx,qword ptr [rcx] + att_jmp __mark_node + +__mark_hnf_2: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x20000000 + jbe fits__in__word__6 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word__6: + +__mark_record_2_c: + mov rax,qword ptr [rcx] + mov rbp,qword ptr 8[rcx] + or rax,2 + mov qword ptr 8[rcx],rsi + mov qword ptr [rcx],rax + lea rsi,8[rcx] + mov rcx,rbp + +__mark_node: + lea rdx,[r10+rcx] + cmp rdx,r11 + jae __mark_next_node + + mov rbx,rdx + and rdx,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov ebp,dword ptr [r9+rdx] + test ebp,dword ptr [rdi+rbx*4] + att_je __mark_arguments + +__mark_next_node: + test rsi,3 + jne __mark_parent + + mov rbp,qword ptr (-8)[rsi] + mov rdx,qword ptr [rsi] + mov qword ptr [rsi],rcx + mov qword ptr (-8)[rsi],rdx + sub rsi,8 + + mov rcx,rbp + and rbp,3 + and rcx,-4 + or rsi,rbp + att_jmp __mark_node + +__mark_parent: + mov rbx,rsi + and rsi,-4 + att_je __end_mark_using_reversal + + and rbx,3 + mov rbp,qword ptr [rsi] + mov qword ptr [rsi],rcx + + sub rbx,1 + je __argument_part_parent + + lea rcx,(-8)[rsi] + mov rsi,rbp + att_jmp __mark_next_node + +__argument_part_parent: + and rbp,-4 + mov rdx,rsi + mov rcx,qword ptr (-8)[rbp] + mov rbx,qword ptr [rbp] + mov qword ptr (-8)[rbp],rbx + mov qword ptr [rbp],rdx + lea rsi,(2-8)[rbp] + att_jmp __mark_node + +__mark_lazy_node: + movsxd rbp,dword ptr(-4)[rax] + test rbp,rbp + je __mark_real_or_file + + add rcx,8 + cmp rbp,1 + att_jle __mark_lazy_node_1 + cmp rbp,256 + jge __mark_closure_with_unboxed_arguments + + add rbp,1 + mov rax,rdx + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,rbp + + lea rax,[rax+rbp*8] + sub rbp,2 + + or dword ptr [rdi+rbx*4],edx + + cmp rax,32*8 + jbe fits__in__word__7 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word__7: +__mark_closure_with_unboxed_arguments__2: + lea rdx,[rcx+rbp*8] + mov rax,qword ptr [rcx] + or rax,2 + mov qword ptr [rcx],rax + mov rcx,qword ptr [rdx] + mov qword ptr [rdx],rsi + mov rsi,rdx + att_jmp __mark_node + +__mark_closure_with_unboxed_arguments: + mov rax,rbp + and rbp,255 + + sub rbp,1 + je __mark_closure_1_with_unboxed_argument + add rbp,2 + + shr rax,8 + add r14,rbp + + push rcx + lea rcx,[rdx+rbp*8] + + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + sub rbp,rax + + or dword ptr [rdi+rbx*4],edx + cmp rcx,32*8 + jbe fits__in_word_7_ + or dword ptr 4[rdi+rbx*4],1 +fits__in_word_7_: + pop rcx + sub rbp,2 + att_jg __mark_closure_with_unboxed_arguments__2 + att_je __shared_argument_part + sub rcx,8 + att_jmp __mark_next_node + +__mark_closure_1_with_unboxed_argument: + sub rcx,8 + att_jmp __mark_real_or_file + +__mark_hnf_0: + lea r9,dINT+2[rip] + cmp rax,r9 + jne __no_int_3 + + mov rbp,qword ptr 8[rcx] + cmp rbp,33 + jb ____small_int + +__mark_bool_or_small_string: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,2 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x40000000 + att_jbe __mark_next_node + or dword ptr 4[rdi+rbx*4],1 + att_jmp __mark_next_node + +____small_int: + shl rbp,4 + lea rcx,small_integers[rip] + add rcx,rbp + att_jmp __mark_next_node + +__no_int_3: + jb __mark_real_file_or_string + + lea r9,CHAR+2[rip] + cmp rax,r9 + jne __no_char_3 + + movzx rbp,byte ptr 8[rcx] + shl rbp,4 + lea rcx,static_characters[rip] + add rcx,rbp + att_jmp __mark_next_node + +__no_char_3: + att_jb __mark_bool_or_small_string + + lea rcx,((-8)-2)[rax] + att_jmp __mark_next_node + +__mark_real_file_or_string: + lea r9,__STRING__+2[rip] + cmp rax,r9 + jbe __mark_string_or_array + +__mark_real_or_file: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + + or dword ptr [rdi+rbx*4],edx + + cmp rdx,0x20000000 + att_jbe __mark_next_node + + or dword ptr 4[rdi+rbx*4],1 + att_jmp __mark_next_node + +__mark__record: + sub rbp,258 + je __mark_record_2 + jl __mark_record_1 + +__mark_record_3: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x20000000 + jbe fits__in__word__13 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word__13: + movzx rbx,word ptr (-2+2)[rax] + + mov rdx,qword ptr 8[rcx] + add rdx,r10 + mov rax,rdx + and rdx,31*8 + shr rax,8 + + push rsi + + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rdx] + test esi,dword ptr [rdi+rax*4] + jne __shared_record_argument_part + + add rbp,1 + or dword ptr [rdi+rax *4],esi + + lea rdx,[rdx+rbp*8] + add r14,rbp + + pop rsi + + cmp rdx,32*8 + jbe fits__in__word__14 + or dword ptr 4[rdi+rax*4],1 +fits__in__word__14: + sub rbx,1 + mov rdx,qword ptr 8[rcx] + jl __mark_record_3_bb + att_je __shared_argument_part + + mov qword ptr 8[rcx],rsi + add rcx,8 + + sub rbx,1 + je __mark_record_3_aab + + lea rsi,[rdx+rbx*8] + mov rax,qword ptr [rdx] + or rax,1 + mov rbp,qword ptr [rsi] + mov qword ptr [rdx],rax + mov qword ptr [rsi],rcx + mov rcx,rbp + att_jmp __mark_node + +__mark_record_3_bb: + sub rcx,8 + att_jmp __mark_next_node + +__mark_record_3_aab: + mov rbp,qword ptr [rdx] + mov qword ptr [rdx],rcx + lea rsi,1[rdx] + mov rcx,rbp + att_jmp __mark_node + +__shared_record_argument_part: + mov rdx,qword ptr 8[rcx] + + pop rsi + + test rbx,rbx + att_jne __shared_argument_part + sub rcx,8 + att_jmp __mark_next_node + +__mark_record_2: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x20000000 + jbe fits__in__word_12 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word_12: + cmp word ptr (-2+2)[rax],1 + att_ja __mark_record_2_c + att_je __shared_argument_part + sub rcx,8 + att_jmp __mark_next_node + +__mark_record_1: + cmp word ptr (-2+2)[rax],0 + att_jne __mark_hnf_1 + sub rcx,8 + att_jmp __mark_bool_or_small_string + +__mark_string_or_array: + je __mark_string_ + +__mark_array: + mov rbp,qword ptr 16[rcx] + test rbp,rbp + je __mark_lazy_array + + movzx rax,word ptr (-2)[rbp] + test rax,rax + je __mark_strict_basic_array + + movzx rbp,word ptr (-2+2)[rbp] + test rbp,rbp + je __mark_b_record_array + + sub rax,256 + cmp rax,rbp + je __mark_a_record_array + +__mark__ab__record__array: + push rdx + push rbx + mov rbx,rbp + + mov rbp,qword ptr 8[rcx] + add rcx,16 + push rcx + + shl rbp,3 + mov rdx,rax + imul rdx,rbp + + sub rax,rbx + add rcx,8 + add rdx,rcx + + att_call reorder + + pop rcx + + xchg rax,rbx + mov rbp,qword ptr (-8)[rcx] + imul rax,rbp + imul rbx,rbp + add r14,rbx + add rbx,rax + + shl rbx,3 + lea rbp,[r10+rcx] + add rbp,rbx + + pop rbx + pop rdx + + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + or dword ptr [rdi+rbx*4],edx + + lea rdx,[rcx+rax*8] + jmp __mark_r_array + +__mark_a_record_array: + imul rax,qword ptr 8[rcx] + add rcx,16 + jmp __mark_lr_array + +__mark_lazy_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + +__mark_lr_array: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + mov rbp,r10 + or dword ptr [rdi+rbx*4],edx + lea rdx,[rcx+rax*8] + add rbp,rdx +__mark_r_array: + shr rbp,8 + + cmp rbx,rbp + jae __skip_mark_lazy_array_bits + + inc rbx + +__mark_lazy_array_bits: + or dword ptr [rdi+rbx*4],1 + inc rbx + cmp rbx,rbp + att_jbe __mark_lazy_array_bits + +__skip_mark_lazy_array_bits: + add r14,3 + add r14,rax + + cmp rax,1 + jbe __mark_array_length_0_1 + + mov rbp,qword ptr [rdx] + mov rbx,qword ptr [rcx] + mov qword ptr [rdx],rbx + mov qword ptr [rcx],rbp + + mov rbp,qword ptr (-8)[rdx] + sub rdx,8 + mov rbx,qword ptr lazy_array_list[rip] + add rbp,2 + mov qword ptr [rdx],rbx + mov qword ptr (-8)[rcx],rbp + mov qword ptr (-16)[rcx],rax + sub rcx,16 + mov qword ptr lazy_array_list[rip],rcx + + mov rcx,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rsi + lea rsi,(-8)[rdx] + att_jmp __mark_node + +__mark_array_length_0_1: + lea rcx,(-16)[rcx] + att_jb __mark_next_node + + mov rbx,qword ptr 24[rcx] + mov rbp,qword ptr 16[rcx] + mov rdx,qword ptr lazy_array_list[rip] + mov qword ptr 24[rcx],rbp + mov qword ptr 16[rcx],rdx + mov qword ptr [rcx],rax + mov qword ptr lazy_array_list[rip],rcx + mov qword ptr 8[rcx],rbx + add rcx,8 + + mov rbp,qword ptr [rcx] + mov qword ptr [rcx],rsi + lea rsi,2[rcx] + mov rcx,rbp + att_jmp __mark_node + +__mark_b_record_array: + mov rbp,qword ptr 8[rcx] + sub rax,256 + imul rax,rbp + add rax,3 + jmp __mark_basic_array + +__mark_strict_basic_array: + mov rax,qword ptr 8[rcx] + lea r9,dINT+2[rip] + cmp rbp,r9 + jle __mark__strict__int__or__real__array + lea r9,BOOL+2[rip] + cmp rbp,r9 + je __mark__strict__bool__array +__mark__strict__int32__or__real32__array: + add rax,6+1 + shr rax,1 + att_jmp __mark_basic_array +__mark__strict__int__or__real__array: + add rax,3 + att_jmp __mark_basic_array +__mark__strict__bool__array: + add rax,24+7 + shr rax,3 + att_jmp __mark_basic_array + +__mark_string_: + mov rax,qword ptr 8[rcx] + add rax,16+7 + shr rax,3 + +__mark_basic_array: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,rax + + or dword ptr [rdi+rbx*4],edx + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + att_jae __mark_next_node + + inc rbx + mov rbp,1 + + cmp rbx,rax + att_jae __last__string__bits + +__mark_string_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + att_jb __mark_string_lp + +__last__string__bits: + or dword ptr [rdi+rbx*4],ebp + att_jmp __mark_next_node diff --git a/macho64/amark_prefetch.s b/macho64/amark_prefetch.s new file mode 100644 index 0000000..63b0360 --- /dev/null +++ b/macho64/amark_prefetch.s @@ -0,0 +1,1757 @@ + + + .data +n_queue_items: + .quad 0 +queue_first: + .quad 0 +queue: + .quad 0,0,0,0,0,0,0,0 + .quad 0,0,0,0,0,0,0,0 + + .text + +pmark: + mov rax,qword ptr heap_size_65[rip] + xor rbx,rbx + + mov qword ptr n_marked_words[rip],rbx + shl rax,6 + + mov qword ptr heap_size_64_65[rip],rax + mov qword ptr lazy_array_list[rip],rbx + + lea rsi,(-4000)[rsp] + + mov rax,qword ptr caf_list[rip] + + mov qword ptr end_stack[rip],rsi + + mov r15,0 + mov r8,0 + + mov r10,neg_heap_p3[rip] + mov r11,heap_size_64_65[rip] + mov r13,qword ptr end_stack[rip] + mov r14,0 + + test rax,rax + je end_pmark_cafs + +pmark_cafs_lp: + mov rbx,qword ptr [rax] + mov rbp,qword ptr (-8)[rax] + + push rbp + lea rbp,8[rax] + lea r12,8[rax+rbx*8] + + call pmark_stack_nodes + + pop rax + test rax,rax + att_jne pmark_cafs_lp + +end_pmark_cafs: + mov rsi,qword ptr stack_top[rip] + mov rbp,qword ptr stack_p[rip] + + mov r12,rsi + att_call pmark_stack_nodes + att_jmp continue_mark_after_pmark + +/* %rbp : pointer to stack element */ +/* %rdi : heap_vector */ +/* %rax ,%rbx ,%rcx ,%rdx ,%rsi : free */ + +pmark_stack_nodes: + cmp rbp,r12 + je end_pmark_nodes +pmark_stack_nodes_: + mov rcx,qword ptr [rbp] + + add rbp,8 + lea rdx,[r10+rcx] + + cmp rdx,r11 + att_jnc pmark_stack_nodes + + mov rbx,rdx + and rdx,31*8 + + shr rbx,8 + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rdx] + + test esi,dword ptr [rdi+rbx*4] + att_jne pmark_stack_nodes + + push rbp + + push 0 + + jmp pmark_node_ + +pmark_hnf_2: + cmp rsi,0x20000000 + jbe pmark_fits_in_word_6 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_6: + add r14,3 + +pmark_record_2_c: + mov rbx,qword ptr 8[rcx] + push rbx + + cmp rsp,r13 + jb pmarkr_using_reversal + +pmark_node2: +pmark_shared_argument_part: + mov rcx,qword ptr [rcx] + +pmark_node: + lea rdx,[r10+rcx] + cmp rdx,r11 + jnc pmark_next_node + + mov rbx,rdx + and rdx,31*8 + + shr rbx,8 + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rdx] + + test esi,dword ptr [rdi+rbx*4] + att_jne pmark_next_node + +pmark_node_: + + prefetch [rcx] + + lea r9,queue[rip] + mov qword ptr [r9+r8],rcx + lea rdx,[r8+r15*8] + add r8,8 + + and r8,15*8 + and rdx,15*8 + + cmp r15,-4 + je pmark_last_item_in_queue + +pmark_add_items: + mov rcx,qword ptr [rsp] + test rcx,rcx + jne pmark_add_stacked_item + +pmark_add_items2: + mov rbp,qword ptr 8[rsp] + cmp rbp,r12 + att_je pmark_last_item_in_queue + + mov rcx,qword ptr [rbp] + add rbp,8 + mov qword ptr 8[rsp],rbp + + lea rbp,[r10+rcx] + cmp rbp,r11 + att_jnc pmark_add_items2 + mov rax,rbp + and rbp,31*8 + shr rax,8 + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rbp] + test esi,dword ptr [rdi+rax*4] + att_jne pmark_add_items2 + + prefetch [rcx] + + lea r9,queue[rip] + mov qword ptr [r9+r8],rcx + add r8,8 + and r8,15*8 + + sub r15,1 + + cmp r15,-4 + att_jne pmark_add_items2 + att_jmp pmark_last_item_in_queue + +pmark_add_stacked_item: + add rsp ,8 + + lea rbp,[r10+rcx] + cmp rbp,r11 + att_jnc pmark_add_items + mov rax,rbp + and rbp,31*8 + shr rax,8 + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rbp] + test esi,dword ptr [rdi+rax*4] + att_jne pmark_add_items + + prefetch [rcx] + + lea r9,queue[rip] + mov qword ptr [r9+r8],rcx + add r8,8 + and r8,15*8 + + sub r15,1 + + cmp r15,-4 + att_jne pmark_add_items + +pmark_last_item_in_queue: + lea r9,queue[rip] + mov rcx,qword ptr [r9+rdx] + + lea rdx,[r10+rcx] + + mov rbx,rdx + and rdx,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov esi,dword ptr[r9+rdx] + + test esi,dword ptr [rdi+rbx*4] + att_jne pmark_next_node + +pmark_arguments: + mov rax,qword ptr [rcx] + test rax,2 + je pmark_lazy_node + + movzx rbp,word ptr (-2)[rax] + + test rbp,rbp + je pmark_hnf_0 + + or dword ptr [rdi+rbx*4],esi + add rcx,8 + + cmp rbp,256 + jae pmark_record + + sub rbp,2 + att_je pmark_hnf_2 + jb pmark_hnf_1 + +pmark_hnf_3: + mov rdx,qword ptr 8[rcx] + + cmp rsi,0x20000000 + jbe pmark_fits_in_word_1 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_1: + + add r14,3 + lea rax,[r10+rdx] + mov rbx,rax + + and rax,31*8 + shr rbx,8 + + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rax] + + test esi,dword ptr [rdi+rbx*4] + att_jne pmark_shared_argument_part + +pmark_no_shared_argument_part: + or dword ptr [rdi+rbx*4],esi + add rbp,1 + + add r14,rbp + lea rax,[rax+rbp*8] + lea rdx,(-8)[rdx+rbp*8] + + cmp rax,32*8 + jbe pmark_fits_in_word_2 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_2: + + mov rbx,qword ptr [rdx] + sub rbp,2 + push rbx + +pmark_push_hnf_args: + mov rbx,qword ptr (-8)[rdx] + sub rdx,8 + push rbx + sub rbp,1 + att_jge pmark_push_hnf_args + + cmp rsp,r13 + att_jae pmark_node2 + + att_jmp pmarkr_using_reversal + +pmark_hnf_1: + cmp rsi,0x40000000 + jbe pmark_fits_in_word_4 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_4: + add r14,2 + mov rcx,qword ptr [rcx] + att_jmp pmark_node + +pmark_lazy_node_1: + add rcx,8 + or dword ptr [rdi+rbx*4],esi + cmp rsi,0x20000000 + jbe pmark_fits_in_word_3 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_3: + add r14,3 + + cmp rbp,1 + att_je pmark_node2 + +pmark_selector_node_1: + add rbp,2 + mov rdx,qword ptr [rcx] + je pmark_indirection_node + + lea rsi,[r10+rdx] + mov rbx,rsi + + shr rbx,8 + and rsi,31*8 + + add rbp,1 + + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rsi] + jle pmark_record_selector_node_1 + + test esi,dword ptr [rdi+rbx*4] + jne pmark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + att_je pmark_node3 + + cmp word ptr (-2)[rbp],2 + jbe pmark_small_tuple_or_record + +pmark_large_tuple_or_record: + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov ebp,dword ptr [r9+rbp] + test ebp,dword ptr [rdi+rbx*4] + att_jne pmark_node3 + + movsxd rbp,dword ptr(-8)[rax] + add rax,rbp + lea rbp,__indirection[rip] + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr (4-8)[rax] + mov rbp,rcx + + cmp rax,16 + jl pmark_tuple_selector_node_1 + mov rdx,r9 + je pmark_tuple_selector_node_2 + mov rcx,qword ptr (-24)[r9+rax] + mov qword ptr [rbp],rcx + att_jmp pmark_node + +pmark_tuple_selector_node_2: + mov rcx,qword ptr [r9] + mov qword ptr [rbp],rcx + att_jmp pmark_node + +pmark_small_tuple_or_record: + movsxd rbp,dword ptr(-8)[rax] + add rax,rbp + lea rbp,__indirection[rip] + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr (4-8)[rax] + mov rbp,rcx +pmark_tuple_selector_node_1: + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rbp],rcx + att_jmp pmark_node + +pmark_record_selector_node_1: + je pmark_strict_record_selector_node_1 + + test esi,dword ptr [rdi+rbx*4] + att_jne pmark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + att_je pmark_node3 + + cmp word ptr (-2)[rbp],258 + att_jbe pmark_small_tuple_or_record + + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov ebp,dword ptr [r9+rbp] + test ebp,dword ptr [rdi+rbx*4] + att_jne pmark_node3 + + movsxd rbp,dword ptr(-8)[rax] + add rax,rbp + lea rbp,__indirection[rip] + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr (4-8)[rax] + mov rbp,rcx + + cmp rax,16 + jle pmark_record_selector_node_2 + mov rdx,r9 + sub rax,24 +pmark_record_selector_node_2: + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rbp],rcx + att_jmp pmark_node + +pmark_strict_record_selector_node_1: + test esi,dword ptr [rdi+rbx*4] + att_jne pmark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + att_je pmark_node3 + + cmp word ptr (-2)[rbp],258 + jbe pmark_select_from_small_record + + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov ebp,dword ptr [r9+rbp] + test ebp,dword ptr [rdi+rbx*4] + att_jne pmark_node3 + +pmark_select_from_small_record: + movsxd rbx,dword ptr(-8)[rax] + add rax,rbx + sub rcx,8 + + movzx ebx,word ptr (4-8)[rax] + cmp rbx,16 + jle pmark_strict_record_selector_node_2 + mov rbx,qword ptr (-24)[r9+rbx] + jmp pmark_strict_record_selector_node_3 +pmark_strict_record_selector_node_2: + mov rbx,qword ptr [rdx+rbx] +pmark_strict_record_selector_node_3: + mov qword ptr 8[rcx],rbx + + movzx ebx,word ptr (6-8)[rax] + test rbx,rbx + je pmark_strict_record_selector_node_5 + cmp rbx,16 + jle pmark_strict_record_selector_node_4 + mov rdx,r9 + sub rbx,24 +pmark_strict_record_selector_node_4: + mov rbx,qword ptr [rdx+rbx] + mov qword ptr 16[rcx],rbx +pmark_strict_record_selector_node_5: + + mov rax,qword ptr ((-8)-8)[rax] + mov qword ptr [rcx],rax + att_jmp pmark_next_node + +pmark_indirection_node: +pmark_node3: + mov rcx,rdx + att_jmp pmark_node + +pmark_next_node: + pop rcx + test rcx,rcx + att_jne pmark_node + + pop rbp + cmp rbp,r12 + att_jne pmark_stack_nodes_ + +end_pmark_nodes: + test r15,r15 + je end_pmark_nodes_ + + push rbp + + push 0 + + lea rdx,[r8+r15*8] + add r15,1 + + and rdx,15*8 + + att_jmp pmark_last_item_in_queue + +end_pmark_nodes_: + ret + +pmark_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + je pmark_real_or_file + + cmp rbp,1 + att_jle pmark_lazy_node_1 + + cmp rbp,256 + jge pmark_closure_with_unboxed_arguments + inc rbp + or dword ptr [rdi+rbx*4],esi + + add r14,rbp + lea rdx,[rdx+rbp*8] + lea rcx,[rcx+rbp*8] + + cmp rdx,32*8 + jbe pmark_fits_in_word_7 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_7: + sub rbp,3 +pmark_push_lazy_args: + mov rbx,qword ptr (-8)[rcx] + sub rcx,8 + push rbx + sub rbp,1 + att_jge pmark_push_lazy_args + + sub rcx,8 + + cmp rsp,r13 + att_jae pmark_node2 + + att_jmp pmarkr_using_reversal + +pmark_closure_with_unboxed_arguments: + mov rax,rbp + and rbp,255 + sub rbp,1 + att_je pmark_real_or_file + + shr rax,8 + add rbp,2 + + or dword ptr [rdi+rbx*4],esi + add r14,rbp + lea rdx,[rdx+rbp*8] + + sub rbp,rax + + cmp rdx,32*8 + jbe pmark_fits_in_word_7_ + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_7_: + sub rbp,2 + att_jl pmark_next_node + + lea rcx,16[rcx+rbp*8] + att_jne pmark_push_lazy_args + +pmark_closure_with_one_boxed_argument: + mov rcx,qword ptr (-8)[rcx] + att_jmp pmark_node + +pmark_hnf_0: + lea r9,dINT+2[rip] + cmp rax,r9 + jb pmark_real_file_or_string + + or dword ptr [rdi+rbx*4],esi + + lea r9,CHAR+2[rip] + cmp rax,r9 + ja pmark_normal_hnf_0 + +pmark_bool: + add r14,2 + + cmp rsi,0x40000000 + att_jbe pmark_next_node + + or dword ptr 4[rdi+rbx*4],1 + att_jmp pmark_next_node + +pmark_normal_hnf_0: + inc r14 + att_jmp pmark_next_node + +pmark_real_file_or_string: + lea r9,__STRING__+2[rip] + cmp rax,r9 + jbe pmark_string_or_array + +pmark_real_or_file: + or dword ptr [rdi+rbx*4],esi + add r14,3 + + cmp rsi,0x20000000 + att_jbe pmark_next_node + + or dword ptr 4[rdi+rbx*4],1 + att_jmp pmark_next_node + +pmark_record: + sub rbp,258 + je pmark_record_2 + jl pmark_record_1 + +pmark_record_3: + add r14,3 + + cmp rsi,0x20000000 + jbe pmark_fits_in_word_13 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_13: + mov rdx,qword ptr 8[rcx] + + movzx rbx,word ptr (-2+2)[rax] + lea rsi,[r10+rdx] + + mov rax,rsi + and rsi,31*8 + + shr rax,8 + sub rbx,1 + + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rsi] + jb pmark_record_3_bb + + test edx,dword ptr [rdi+rax*4] + att_jne pmark_node2 + + add rbp,1 + or dword ptr [rdi+rax*4],edx + add r14,rbp + lea rsi,[rsi+rbp*8] + + cmp rsi,32*8 + jbe pmark_push_record_arguments + or dword ptr 4[rdi+rax*4],1 +pmark_push_record_arguments: + mov rdx,qword ptr 8[rcx] + mov rbp,rbx + shl rbx,3 + add rdx,rbx + sub rbp,1 + att_jge pmark_push_hnf_args + + att_jmp pmark_node2 + +pmark_record_3_bb: + test edx,dword ptr [rdi+rax*4] + att_jne pmark_next_node + + add rbp,1 + or dword ptr [rdi+rax*4],edx + add r14,rbp + lea rsi,[rsi+rbp*8] + + cmp rsi,32*8 + att_jbe pmark_next_node + + or dword ptr 4[rdi+rax*4],1 + att_jmp pmark_next_node + +pmark_record_2: + cmp rsi,0x20000000 + jbe pmark_fits_in_word_12 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_12: + add r14,3 + + cmp word ptr (-2+2)[rax],1 + att_ja pmark_record_2_c + att_je pmark_node2 + att_jmp pmark_next_node + +pmark_record_1: + cmp word ptr (-2+2)[rax],0 + att_jne pmark_hnf_1 + + att_jmp pmark_bool + +pmark_string_or_array: + je pmark_string_ + +pmark_array: + mov rbp,qword ptr 16[rcx] + test rbp,rbp + je pmark_lazy_array + + movzx rax,word ptr (-2)[rbp] + + test rax,rax + je pmark_strict_basic_array + + movzx rbp,word ptr (-2+2)[rbp] + test rbp,rbp + je pmark_b_record_array + + cmp rsp,r13 + jb pmark_array_using_reversal + + sub rax,256 + cmp rax,rbp + je pmark_a_record_array + +pmark_ab_record_array: + or dword ptr [rdi+rbx*4],esi + mov rbp,qword ptr 8[rcx] + + imul rax,rbp + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae pmark_end_set_ab_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae pmark_last_ab_array_bits + +pmark_ab_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + att_jb pmark_ab_array_lp + +pmark_last_ab_array_bits: + or dword ptr [rdi+rbx*4],ebp + +pmark_end_set_ab_array_bits: + mov rax,qword ptr 8[rcx] + mov rdx,qword ptr 16[rcx] + movzx rbx,word ptr (-2+2)[rdx] + movzx rdx,word ptr (-2)[rdx] + shl rbx,3 + lea rdx,(-2048)[rdx*8] + push rbx + push rdx + lea rbp,24[rcx] + push r12 + jmp pmark_ab_array_begin + +pmark_ab_array: + mov rbx,qword ptr 16[rsp] + push rax + push rbp + lea r12,[rbp+rbx] + + att_call pmark_stack_nodes + + mov rbx,qword ptr (8+16)[rsp] + pop rbp + pop rax + add rbp,rbx +pmark_ab_array_begin: + sub rax,1 + att_jnc pmark_ab_array + + pop r12 + add rsp,16 + att_jmp pmark_next_node + +pmark_a_record_array: + or dword ptr [rdi+rbx*4],esi + mov rbp,qword ptr 8[rcx] + + imul rax,rbp + push rax + + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae pmark_end_set_a_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae pmark_last_a_array_bits + +pmark_a_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + att_jb pmark_a_array_lp + +pmark_last_a_array_bits: + or dword ptr [rdi+rbx*4],ebp + +pmark_end_set_a_array_bits: + pop rax + lea rbp,24[rcx] + + push r12 + lea r12,24[rcx+rax*8] + + att_call pmark_stack_nodes + + pop r12 + att_jmp pmark_next_node + +pmark_lazy_array: + cmp rsp,r13 + att_jb pmark_array_using_reversal + + or dword ptr [rdi+rbx*4],esi + mov rax,qword ptr 8[rcx] + + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae pmark_end_set_lazy_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae pmark_last_lazy_array_bits + +pmark_lazy_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + att_jb pmark_lazy_array_lp + +pmark_last_lazy_array_bits: + or dword ptr [rdi+rbx*4],ebp + +pmark_end_set_lazy_array_bits: + mov rax,qword ptr 8[rcx] + lea rbp,24[rcx] + + push r12 + lea r12,24[rcx+rax*8] + + att_call pmark_stack_nodes + + pop r12 + att_jmp pmark_next_node + +pmark_array_using_reversal: + push 0 + mov rsi,1 + jmp pmarkr_node + +pmark_strict_basic_array: + mov rax,qword ptr 8[rcx] + lea r9,dINT+2[rip] + cmp rbp,r9 + jle pmark_strict_int_or_real_array + lea r9,BOOL+2[rip] + cmp rbp,r9 + je pmark_strict_bool_array + add rax,6+1 + shr rax,1 + jmp pmark_basic_array_ +pmark_strict_int_or_real_array: + add rax,3 + att_jmp pmark_basic_array_ +pmark_strict_bool_array: + add rax,24+7 + shr rax,3 + att_jmp pmark_basic_array_ + +pmark_b_record_array: + mov rbp,qword ptr 8[rcx] + sub rax,256 + imul rax,rbp + add rax,3 + att_jmp pmark_basic_array_ + +pmark_string_: + mov rax,qword ptr 8[rcx] + add rax,16+7 + shr rax,3 + +pmark_basic_array_: + or dword ptr [rdi+rbx*4],esi + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + att_jae pmark_next_node + + inc rbx + mov rbp,1 + cmp rbx,rax + jae pmark_last_string_bits + +pmark_string_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + att_jb pmark_string_lp + +pmark_last_string_bits: + or dword ptr [rdi+rbx*4],ebp + att_jmp pmark_next_node + +end_pmarkr_using_reversal: + pop rdx + test rdx,rdx + att_je pmark_next_node + mov qword ptr [rdx],rcx + att_jmp pmark_next_node + + +pmarkr_using_reversal: + push rcx + mov rsi,1 + mov rcx,qword ptr [rcx] + att_jmp pmarkr_node + +pmarkr_arguments: + mov rax,qword ptr [rcx] + test al,2 + je pmarkr_lazy_node + + movzx rbp,word ptr (-2)[rax] + test rbp,rbp + je pmarkr_hnf_0 + + add rcx,8 + + cmp rbp,256 + jae pmarkr_record + + sub rbp,2 + je pmarkr_hnf_2 + jb pmarkr_hnf_1 + +pmarkr_hnf_3: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + + or dword ptr [rdi+rbx*4],edx + + cmp rdx,0x20000000 + + mov rax,qword ptr 8[rcx] + + jbe pmarkr_fits_in_word_1 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_1: + add rax,r10 + + mov rbx,rax + and rax,31*8 + + shr rbx,8 + + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rax] + test edx,dword ptr [rdi+rbx*4] + jne pmarkr_shared_argument_part + +pmarkr_no_shared_argument_part: + or dword ptr [rdi+rbx*4],edx + mov rdx,qword ptr 8[rcx] + + add rbp,1 + mov qword ptr 8[rcx],rsi + + add r14,rbp + add rcx,8 + + shl rbp,3 + or qword ptr [rdx],1 + + add rax,rbp + add rdx,rbp + + cmp rax,32*8 + jbe pmarkr_fits_in_word_2 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_2: + + mov rbp ,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rcx + lea rsi,(-8)[rdx] + mov rcx,rbp + att_jmp pmarkr_node + +pmarkr_hnf_1: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,2 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x40000000 + att_jbe pmarkr_shared_argument_part + or dword ptr 4[rdi+rbx*4],1 +pmarkr_shared_argument_part: + mov rbp,qword ptr [rcx] + mov qword ptr [rcx],rsi + lea rsi,2[rcx] + mov rcx,rbp + att_jmp pmarkr_node + +pmarkr_no_selector_2: + pop rbx +pmarkr_no_selector_1: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x20000000 + att_jbe pmarkr_shared_argument_part + + or dword ptr 4[rdi+rbx*4],1 + att_jmp pmarkr_shared_argument_part + +pmarkr_lazy_node_1: + att_je pmarkr_no_selector_1 + +pmarkr_selector_node_1: + add rbp,2 + je pmarkr_indirection_node + + add rbp,1 + + push rbx + mov rbp,qword ptr [rcx] + push rax + lea rax,[r10+rbp] + + jle pmarkr_record_selector_node_1 + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,dword ptr [rdi+rbx*4] + pop rax + att_jne pmarkr_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + att_je pmarkr_no_selector_2 + + cmp word ptr (-2)[rbx],2 + jbe pmarkr_small_tuple_or_record + +pmarkr_large_tuple_or_record: + mov r8,qword ptr 16[rbp] + mov r9,r8 + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov r8d,dword ptr [r9+r8] + test r8d,dword ptr [rdi+rbx*4] + att_jne pmarkr_no_selector_2 + + movsxd rdx,dword ptr (-8)[rax] + add rax,rdx + lea rdx,__indirection[rip] + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr (4-8)[rax] + mov r8,rcx + + cmp rax,16 + jl pmarkr_tuple_selector_node_1 + mov rdx,r9 + je pmarkr_tuple_selector_node_2 + mov rcx,qword ptr (-24)[r9+rax] + mov qword ptr [r8],rcx + att_jmp pmarkr_node + +pmarkr_tuple_selector_node_2: + mov rcx,qword ptr [r9] + mov qword ptr [r8],rcx + att_jmp pmarkr_node + +pmarkr_small_tuple_or_record: + movsxd rdx,dword ptr(-8)[rax] + add rax,rdx + lea rdx,__indirection[rip] + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr (4-8)[rax] + mov r8,rcx +pmarkr_tuple_selector_node_1: + mov rcx,qword ptr [rbp+rax] + mov qword ptr [r8],rcx + att_jmp pmarkr_node + +pmarkr_record_selector_node_1: + je pmarkr_strict_record_selector_node_1 + + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,dword ptr [rdi+rbx*4] + pop rax + att_jne pmarkr_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + att_je pmarkr_no_selector_2 + + cmp word ptr (-2)[rbx],258 + jbe pmarkr_small_record + + mov r8,qword ptr 16[rbp] + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov r8d,dword ptr [r9+r8] + test r8d,dword ptr [rdi+rbx*4] + + mov r9,qword ptr 16[rbp] + + att_jne pmarkr_no_selector_2 + +pmarkr_small_record: + movsxd rdx,dword ptr (-8)[rax] + add rax,rdx + lea rdx,__indirection[rip] + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr (4-8)[rax] + mov r8,rcx + + cmp rax,16 + jle pmarkr_record_selector_node_2 + mov rdx,r9 + sub rax,24 +pmarkr_record_selector_node_2: + mov rcx,qword ptr [rbp+rax] + mov qword ptr [r8],rcx + att_jmp pmarkr_node + +pmarkr_strict_record_selector_node_1: + mov rbx,rax + and rax,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov eax,dword ptr [r9+rax] + test eax,dword ptr [rdi+rbx*4] + pop rax + att_jne pmarkr_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + att_je pmarkr_no_selector_2 + + cmp word ptr (-2)[rbx],258 + jle pmarkr_select_from_small_record + + mov r8,qword ptr 16[rbp] + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov r8d,dword ptr [r9+r8] + test r8d,dword ptr [rdi+rbx*4] + + mov r9,qword ptr 16[rbp] + + att_jne pmarkr_no_selector_2 + +pmarkr_select_from_small_record: + movsxd rbx,dword ptr(-8)[rax] + add rax,rbx + sub rcx,8 + + movzx ebx,word ptr (4-8)[rax] + cmp rbx,16 + jle pmarkr_strict_record_selector_node_2 + mov rbx,qword ptr (-24)[r9+rbx] + jmp pmarkr_strict_record_selector_node_3 +pmarkr_strict_record_selector_node_2: + mov rbx,qword ptr [rdx+rbx] +pmarkr_strict_record_selector_node_3: + mov qword ptr 8[rcx],rbx + + movzx ebx,word ptr (6-8)[rax] + test rbx,rbx + je pmarkr_strict_record_selector_node_5 + cmp rbx,16 + jle pmarkr_strict_record_selector_node_4 + mov rbp,r9 + sub rbx,24 +pmarkr_strict_record_selector_node_4: + mov rbx,qword ptr [rbp+rbx] + mov qword ptr 16[rcx],rbx +pmarkr_strict_record_selector_node_5: + pop rbx + + mov rax,qword ptr ((-8-8))[rax] + mov qword ptr [rcx],rax + att_jmp pmarkr_node + +pmarkr_indirection_node: + mov rcx,qword ptr [rcx] + att_jmp pmarkr_node + +pmarkr_hnf_2: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x20000000 + jbe pmarkr_fits_in_word_6 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_6: + +pmarkr_record_2_c: + mov rax,qword ptr [rcx] + mov rbp,qword ptr 8[rcx] + or rax,2 + mov qword ptr 8[rcx],rsi + mov qword ptr [rcx],rax + lea rsi,8[rcx] + mov rcx,rbp + +pmarkr_node: + lea rdx,[r10+rcx] + + cmp rdx,r11 + jae pmarkr_next_node + + mov rbx,rdx + and rdx,31*8 + shr rbx,8 + lea r9,bit_set_table2[rip] + mov ebp,dword ptr [r9+rdx] + test ebp,dword ptr [rdi+rbx*4] + att_je pmarkr_arguments + +pmarkr_next_node: + test rsi,3 + jne pmarkr_parent + + mov rbp,qword ptr (-8)[rsi] + mov rdx,qword ptr [rsi] + mov qword ptr [rsi],rcx + mov qword ptr (-8)[rsi],rdx + sub rsi,8 + + mov rcx,rbp + and rbp,3 + and rcx,-4 + or rsi,rbp + att_jmp pmarkr_node + +pmarkr_parent: + mov rbx,rsi + and rsi,-4 + att_je end_pmarkr_using_reversal + + and rbx,3 + mov rbp,qword ptr [rsi] + mov qword ptr [rsi],rcx + + sub rbx,1 + je pmarkr_argument_part_parent + + lea rcx,(-8)[rsi] + mov rsi,rbp + att_jmp pmarkr_next_node + +pmarkr_argument_part_parent: + and rbp,-4 + mov rdx,rsi + mov rcx,qword ptr (-8)[rbp] + mov rbx,qword ptr [rbp] + mov qword ptr (-8)[rbp],rbx + mov qword ptr [rbp],rdx + lea rsi,(2-8)[rbp] + att_jmp pmarkr_node + +pmarkr_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + je pmarkr_real_or_file + + add rcx,8 + cmp rbp,1 + att_jle pmarkr_lazy_node_1 + cmp rbp,256 + jge pmarkr_closure_with_unboxed_arguments + + add rbp,1 + mov rax,rdx + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,rbp + + lea rax,[rax+rbp*8] + sub rbp,2 + + or dword ptr [rdi+rbx*4],edx + + cmp rax,32*8 + jbe pmarkr_fits_in_word_7 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_7: +pmarkr_closure_with_unboxed_arguments_2: + lea rdx,[rcx+rbp*8] + mov rax,qword ptr [rcx] + or rax,2 + mov qword ptr [rcx],rax + mov rcx,qword ptr [rdx] + mov qword ptr [rdx],rsi + mov rsi,rdx + att_jmp pmarkr_node + +pmarkr_closure_with_unboxed_arguments: + mov rax,rbp + and rbp,255 + + sub rbp,1 + je pmarkr_closure_1_with_unboxed_argument + add rbp,2 + + shr rax,8 + add r14,rbp + + push rcx + lea rcx,[rdx+rbp*8] + + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + sub rbp,rax + + or dword ptr [rdi+rbx*4],edx + cmp rcx,32*8 + jbe pmarkr_fits_in_word_7_ + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_7_: + pop rcx + sub rbp,2 + att_jg pmarkr_closure_with_unboxed_arguments_2 + att_je pmarkr_shared_argument_part + sub rcx,8 + att_jmp pmarkr_next_node + +pmarkr_closure_1_with_unboxed_argument: + sub rcx,8 + att_jmp pmarkr_real_or_file + +pmarkr_hnf_0: + lea r9,dINT+2[rip] + cmp rax,r9 + jne pmarkr_no_int_3 + + mov rbp,qword ptr 8[rcx] + cmp rbp,33 + + jb pmarkr_small_int + +pmarkr_bool_or_small_string: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,2 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x40000000 + att_jbe pmarkr_next_node + or dword ptr 4[rdi+rbx*4],1 + att_jmp pmarkr_next_node + +pmarkr_small_int: + shl rbp,4 + lea rcx,small_integers[rip] + add rcx,rbp + att_jmp pmarkr_next_node + +pmarkr_no_int_3: + jb pmarkr_real_file_or_string + + lea r9,CHAR+2[rip] + cmp rax,r9 + jne pmarkr_no_char_3 + + movzx rbp,byte ptr 8[rcx] + shl rbp,4 + lea rcx,static_characters[rip] + add rcx,rbp + att_jmp pmarkr_next_node + +pmarkr_no_char_3: + att_jb pmarkr_bool_or_small_string + + lea rcx,((-8)-2)[rax] + att_jmp pmarkr_next_node + +pmarkr_real_file_or_string: + lea r9,__STRING__+2[rip] + cmp rax,r9 + jbe pmarkr_string_or_array + +pmarkr_real_or_file: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + + or dword ptr [rdi+rbx*4],edx + + cmp rdx,0x20000000 + att_jbe pmarkr_next_node + + or dword ptr 4[rdi+rbx*4],1 + att_jmp pmarkr_next_node + +pmarkr_record: + sub rbp,258 + je pmarkr_record_2 + jl pmarkr_record_1 + +pmarkr_record_3: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x20000000 + jbe pmarkr_fits_in_word_13 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_13: + movzx rbx,word ptr (-2+2)[rax] + + mov rdx,qword ptr 8[rcx] + lea rdx,[r10+rdx] + mov rax,rdx + and rdx,31*8 + shr rax,8 + + push rsi + + lea r9,bit_set_table2[rip] + mov esi,dword ptr [r9+rdx] + test esi,dword ptr [rdi+rax*4] + jne pmarkr_shared_record_argument_part + + add rbp,1 + or dword ptr [rdi+rax*4],esi + + lea rdx,[rdx+rbp*8] + add r14,rbp + + pop rsi + + cmp rdx,32*8 + jbe pmarkr_fits_in_word_14 + or dword ptr 4[rdi+rax*4],1 +pmarkr_fits_in_word_14: + sub rbx,1 + mov rdx,qword ptr 8[rcx] + jl pmarkr_record_3_bb + att_je pmarkr_shared_argument_part + + mov qword ptr 8[rcx],rsi + add rcx,8 + + sub rbx,1 + je pmarkr_record_3_aab + + lea rsi,[rdx+rbx*8] + mov rax,qword ptr [rdx] + or rax,1 + mov rbp,qword ptr [rsi] + mov qword ptr [rdx],rax + mov qword ptr [rsi],rcx + mov rcx,rbp + att_jmp pmarkr_node + +pmarkr_record_3_bb: + sub rcx,8 + att_jmp pmarkr_next_node + +pmarkr_record_3_aab: + mov rbp,qword ptr [rdx] + mov qword ptr [rdx],rcx + lea rsi,1[rdx] + mov rcx,rbp + att_jmp pmarkr_node + +pmarkr_shared_record_argument_part: + mov rdx,qword ptr 8[rcx] + + pop rsi + + test rbx,rbx + att_jne pmarkr_shared_argument_part + sub rcx,8 + att_jmp pmarkr_next_node + +pmarkr_record_2: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,0x20000000 + jbe pmarkr_fits_in_word_12 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_12: + cmp word ptr (-2+2)[rax],1 + att_ja pmarkr_record_2_c + att_je pmarkr_shared_argument_part + sub rcx,8 + att_jmp pmarkr_next_node + +pmarkr_record_1: + cmp word ptr (-2+2)[rax],0 + att_jne pmarkr_hnf_1 + sub rcx,8 + att_jmp pmarkr_bool_or_small_string + +pmarkr_string_or_array: + je pmarkr_string_ + +pmarkr_array: + mov rbp,qword ptr 16[rcx] + test rbp,rbp + je pmarkr_lazy_array + + movzx rax,word ptr (-2)[rbp] + test rax,rax + je pmarkr_strict_basic_array + + movzx rbp,word ptr (-2+2)[rbp] + test rbp,rbp + je pmarkr_b_record_array + + sub rax,256 + cmp rax,rbp + je pmarkr_a_record_array + +pmarkr_ab_record_array: + push rdx + push rbx + mov rbx,rbp + + mov rbp,qword ptr 8[rcx] + add rcx,16 + push rcx + + shl rbp,3 + mov rdx,rax + imul rdx,rbp + + sub rax,rbx + add rcx,8 + add rdx,rcx + + att_call reorder + + pop rcx + + xchg rax,rbx + mov rbp,qword ptr (-8)[rcx] + imul rax,rbp + imul rbx,rbp + add r14,rbx + add rbx,rax + + shl rbx,3 + lea rbp,[r10+rcx] + add rbp,rbx + + pop rbx + pop rdx + + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + or dword ptr [rdi+rbx*4],edx + + lea rdx,[rcx+rax*8] + jmp pmarkr_r_array + +pmarkr_a_record_array: + imul rax,qword ptr 8[rcx] + add rcx,16 + jmp pmarkr_lr_array + +pmarkr_lazy_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + +pmarkr_lr_array: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + mov rbp,r10 + or dword ptr [rdi+rbx*4],edx + lea rdx,[rcx+rax*8] + add rbp,rdx +pmarkr_r_array: + shr rbp,8 + + cmp rbx,rbp + jae pmarkr_skip_mark_lazy_array_bits + + inc rbx + +pmarkr_lazy_array_bits: + or dword ptr [rdi+rbx*4],1 + inc rbx + cmp rbx,rbp + att_jbe pmarkr_lazy_array_bits + +pmarkr_skip_mark_lazy_array_bits: + add r14,3 + add r14,rax + + cmp rax,1 + jbe pmarkr_array_length_0_1 + + mov rbp,qword ptr [rdx] + mov rbx,qword ptr [rcx] + mov qword ptr [rdx],rbx + mov qword ptr [rcx],rbp + + mov rbp,qword ptr (-8)[rdx] + sub rdx,8 + mov rbx,qword ptr lazy_array_list[rip] + add rbp,2 + mov qword ptr [rdx],rbx + mov qword ptr (-8)[rcx],rbp + mov qword ptr (-16)[rcx],rax + sub rcx,16 + mov qword ptr lazy_array_list[rip],rcx + + mov rcx,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rsi + lea rsi,(-8)[rdx] + att_jmp pmarkr_node + +pmarkr_array_length_0_1: + lea rcx,(-16)[rcx] + att_jb pmarkr_next_node + + mov rbx,qword ptr 24[rcx] + mov rbp,qword ptr 16[rcx] + mov rdx,qword ptr lazy_array_list[rip] + mov qword ptr 24[rcx],rbp + mov qword ptr 16[rcx],rdx + mov qword ptr [rcx],rax + mov qword ptr lazy_array_list[rip],rcx + mov qword ptr 8[rcx],rbx + add rcx,8 + + mov rbp,qword ptr [rcx] + mov qword ptr [rcx],rsi + lea rsi,2[rcx] + mov rcx,rbp + att_jmp pmarkr_node + +pmarkr_b_record_array: + mov rbp,qword ptr 8[rcx] + sub rax,256 + imul rax,rbp + add rax,3 + jmp pmarkr_basic_array + +pmarkr_strict_basic_array: + mov rax,qword ptr 8[rcx] + lea r9,dINT+2[rip] + cmp rbp,r9 + jle pmarkr_strict_int_or_real_array + lea r9,BOOL+2[rip] + cmp rbp,r9 + je pmarkr_strict_bool_array + add rax,6+1 + shr rax,1 + att_jmp pmarkr_basic_array +pmarkr_strict_int_or_real_array: + add rax,3 + att_jmp pmarkr_basic_array +pmarkr_strict_bool_array: + add rax,24+7 + shr rax,3 + att_jmp pmarkr_basic_array + +pmarkr_string_: + mov rax,qword ptr 8[rcx] + add rax,16+7 + shr rax,3 + +pmarkr_basic_array: + lea r9,bit_set_table2[rip] + mov edx,dword ptr [r9+rdx] + add r14,rax + + or dword ptr [rdi+rbx*4],edx + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + att_jae pmarkr_next_node + + inc rbx + mov rbp,1 + + cmp rbx,rax + jae pmarkr_last_string_bits + +pmarkr_string_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + att_jb pmarkr_string_lp + +pmarkr_last_string_bits: + or dword ptr [rdi+rbx*4],ebp + att_jmp pmarkr_next_node diff --git a/macho64/areals.s b/macho64/areals.s new file mode 100644 index 0000000..363f824 --- /dev/null +++ b/macho64/areals.s @@ -0,0 +1,8965 @@ + + .intel_syntax noprefix + + .macro att_jp + .att_syntax + jp $0 + .intel_syntax noprefix + .endmacro + + .text + + .globl sin_real + +sin_real: + ucomisd xmm0,qword ptr real_pi_d_4[rip] + jbe sin_real_1 # x<=pi/4 | NAN + ucomisd xmm0,qword ptr real_3_pi_d_4[rip] + jbe sin_real_2 + ucomisd xmm0,qword ptr real_5_pi_d_4[rip] + jbe sin_real_3 + ucomisd xmm0,qword ptr real_7_pi_d_4[rip] + jbe sin_real_4 + ucomisd xmm0,qword ptr real_9_pi_d_4[rip] + jbe sin_real_5 + + ucomisd xmm0,qword ptr real_36825084_pi[rip] + jbe sin_real_0 + + ucomisd xmm0,qword ptr real_2_p_53[rip] + jae sin_cos_or_tan_real_too_large # x>=-2^53 + + call rem_36825084_pi + + movlpd xmm2,qword ptr real_pi_d_4_26[rip] + movlpd xmm3,qword ptr real_pi_d_4_26_26[rip] + movlpd xmm13,qword ptr real_pi_d_4_52_l[rip] + + mulsd xmm2,xmm1 + mulsd xmm3,xmm1 + mulsd xmm13,xmm1 + + andpd xmm5,xmm1 + andpd xmm6,xmm1 + + subsd xmm13,xmm7 + jmp sin_real_0_ + +sin_real_0: + movlpd xmm1,qword ptr real_4_d_pi[rip] + + mulsd xmm1,xmm0 # x*4/pi + + movq xmm4,qword ptr round_even_c[rip] + + addsd xmm1,qword ptr real_1_0[rip] # x*4/pi+1 + + movq xmm2,xmm1 + psrlq xmm1,52 + psubq xmm4,xmm1 + + movq xmm1,qword ptr mask_all_one[rip] + psllq xmm1,xmm4 + andpd xmm1,xmm2 # round_even (x*4/pi+1) + + movlpd xmm2,qword ptr real_pi_d_4_26[rip] + movlpd xmm3,qword ptr real_pi_d_4_26_26[rip] + movlpd xmm13,qword ptr real_pi_d_4_52_l[rip] + + movq xmm5,qword ptr mask_all_one_except_last[rip] + psllq xmm5,xmm4 + movq xmm6,qword ptr mask_all_one_except_second_last[rip] + psllq xmm6,xmm4 + + mulsd xmm2,xmm1 + mulsd xmm3,xmm1 + mulsd xmm13,xmm1 + + andpd xmm5,xmm1 + andpd xmm6,xmm1 + +sin_real_0_: + ucomisd xmm1,xmm5 + jne sin_real_cos + +sin_real_sin: + ucomisd xmm1,xmm6 + jne sin_real_n_sin + + subsd xmm0,xmm2 + subsd xmm0,xmm3 + + jmp sin_real_p + +sin_real_n_sin: + movsd xmm1,xmm3 + subsd xmm0,xmm2 + subsd xmm1,xmm0 + jmp sin_real_m + +sin_real_cos: + subsd xmm0,xmm2 + subsd xmm0,xmm3 + + ucomisd xmm1,xmm6 + je cos_real_p + jmp cos_real_m + +sin_real_n: + ucomisd xmm0,qword ptr real_m_3_pi_d_4[rip] + jae sin_real_n_2 + ucomisd xmm0,qword ptr real_m_5_pi_d_4[rip] + jae sin_real_n_3 + ucomisd xmm0,qword ptr real_m_7_pi_d_4[rip] + jae sin_real_n_4 + ucomisd xmm0,qword ptr real_m_9_pi_d_4[rip] + jae sin_real_n_5 + + ucomisd xmm0,qword ptr real_m_36825084_pi[rip] + jae sin_real_n_0 + + ucomisd xmm0,qword ptr real_m_2_p_53[rip] + jbe sin_cos_or_tan_real_too_small_or_nan # x<=-2^53 | NAN + + call rem_n_36825084_pi + + movlpd xmm2,qword ptr real_pi_d_4_26[rip] + movlpd xmm3,qword ptr real_pi_d_4_26_26[rip] + movlpd xmm13,qword ptr real_pi_d_4_52_l[rip] + + mulsd xmm2,xmm1 + mulsd xmm3,xmm1 + mulsd xmm13,xmm1 + + andpd xmm5,xmm1 + andpd xmm6,xmm1 + + subsd xmm13,xmm7 + jmp sin_real_n_0_ + +sin_real_n_0: + movlpd xmm1,qword ptr real_4_d_pi[rip] + + mulsd xmm1,xmm0 # x*4/pi + + movq xmm4,qword ptr m_round_even_c[rip] + + subsd xmm1,qword ptr real_1_0[rip] # x*4/pi-1 + + movq xmm2,xmm1 + psrlq xmm1,52 + psubq xmm4,xmm1 + + movq xmm1,qword ptr mask_all_one[rip] + psllq xmm1,xmm4 + andpd xmm1,xmm2 # round_even (x*4/pi-1) + + movlpd xmm2,qword ptr real_pi_d_4_26[rip] + movlpd xmm3,qword ptr real_pi_d_4_26_26[rip] + movlpd xmm13,qword ptr real_pi_d_4_52_l[rip] + + movq xmm5,qword ptr mask_all_one_except_last[rip] + psllq xmm5,xmm4 + movq xmm6,qword ptr mask_all_one_except_second_last[rip] + psllq xmm6,xmm4 + + mulsd xmm2,xmm1 + mulsd xmm3,xmm1 + mulsd xmm13,xmm1 + + andpd xmm5,xmm1 + andpd xmm6,xmm1 + +sin_real_n_0_: + ucomisd xmm1,xmm5 + att_je sin_real_sin + + subsd xmm0,xmm2 + subsd xmm0,xmm3 + + ucomisd xmm1,xmm6 + att_jne cos_real_p + att_jmp cos_real_m + +sin_real_1: + ucomisd xmm0,qword ptr real_m_pi_d_4[rip] + att_jb sin_real_n # # x<-pi/4 | NAN + + ucomisd xmm0,qword ptr real_0_43540000008249979402[rip] + jae sin_real_p_0_6 + ucomisd xmm0,qword ptr real_m_0_43540000008249979402[rip] + jbe sin_real_m_0_6 + + movsd xmm1,xmm0 # x + + ucomisd xmm0,qword ptr real_0_0[rip] + je sin_real_0_0 # sin -0.0 = -0.0 + + mulsd xmm0,xmm0 # x2 + + movlpd xmm5,qword ptr sin_p_0[rip] + movlpd xmm6,qword ptr sin_p_1[rip] + + movsd xmm2,xmm1 # x + mulsd xmm1,xmm0 # x3 + + movsd xmm3,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + +# ucomisd xmm3,qword ptr real_0_25 # x2>=0.25 (|x|>0.5) + + mulsd xmm3,xmm1 # x5 + + movlpd xmm7,qword ptr sin_p_2[rip] + movlpd xmm8,qword ptr sin_p_3[rip] + + mulsd xmm5,xmm1 # p3 + mulsd xmm1,xmm0 # x7 + + mulsd xmm6,xmm3 # p5 + mulsd xmm3,xmm0 # x9 + + mulsd xmm7,xmm1 # p7 + mulsd xmm1,xmm0 # x11 + + mulsd xmm8,xmm3 # p9 + mulsd xmm3,xmm0 # x13 + + mulsd xmm1,qword ptr sin_p_4[rip] # p11 + mulsd xmm3,qword ptr sin_p_5[rip] # p13 + + movsd xmm0,xmm2 # x + + addsd xmm1,xmm3 # p11+p13 + addsd xmm1,xmm8 # p9+p11+p13 + +# jae sin_real_1_l + + addsd xmm1,xmm7 # p7+p9+p11+p13 + addsd xmm1,xmm6 # p5+p7+p9+p11+p13 + addsd xmm1,xmm5 # p3+p5+p7+p9+p11+p13 + addsd xmm0,xmm1 # x+p3+p5+p7+p9+p11+p13 + +sin_real_0_0: + ret + +#sin_real_1_l: +# movlpd xmm2,qword ptr real_47_bits +# andpd xmm2,xmm5 # p3h + +# addsd xmm1,xmm7 # p7+p9+p11+p13 + +# subsd xmm5,xmm2 # p3l +# addsd xmm0,xmm2 # x+p3h + +# addsd xmm1,xmm6 # p5+p7+p9+p11+p13 +# addsd xmm1,xmm5 # p3l+p5+p7+p9+p11+p13 +# addsd xmm0,xmm1 # x+p3+p5+p7+p9+p11+p13 + +# ret + +sin_real_m_0_6: + movlpd xmm1,qword ptr real_m_0_600000000082499762577[rip] + + ucomisd xmm0,xmm1 + subsd xmm0,xmm1 + + lea rcx,sin_p3_c[rip] + lea rdx,sin_p4_c[rip] + + cmova rcx,rdx + + jmp sin_real_0_6 + +sin_real_p_0_6: + movlpd xmm1,qword ptr real_0_600000000082499762577[rip] + + ucomisd xmm0,xmm1 + subsd xmm0,xmm1 + + lea rcx,sin_p1_c[rip] + lea rdx,sin_p2_c[rip] + + cmovb rcx,rdx + +sin_real_0_6: + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + movlpd xmm7,qword ptr (sin_p1_3-sin_p1_c)[rcx] + movlpd xmm8,qword ptr (sin_p1_2-sin_p1_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm7,xmm2 # p3 + mulsd xmm8,xmm2 # p2 + + movlpd xmm9,qword ptr (sin_p1_5-sin_p1_c)[rcx] + + mulsd xmm2,xmm0 # x6 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + mulsd xmm9,xmm4 # p5 + mulsd xmm4,qword ptr (sin_p1_4-sin_p1_c)[rcx] # p4 + + movlpd xmm11,qword ptr (sin_p1_7-sin_p1_c)[rcx] + movlpd xmm12,qword ptr (sin_p1_9-sin_p1_c)[rcx] + + mulsd xmm11,xmm2 # p7 + mulsd xmm2,qword ptr (sin_p1_6-sin_p1_c)[rcx] # p6 + + mulsd xmm12,xmm0 # p9 + mulsd xmm0,qword ptr (sin_p1_8-sin_p1_c)[rcx] # p8 + + addsd xmm11,xmm12 # p9+p7 + addsd xmm0,xmm2 # p8+p6 + + movlpd xmm2,qword ptr real_26_bits[rip] + movsd xmm3,xmm1 # x + movlpd xmm6,qword ptr (sin_p1_1l-sin_p1_c)[rcx] + movlpd xmm5,qword ptr (sin_p1_1h-sin_p1_c)[rcx] + + addsd xmm9,xmm11 # p9+p7+p5 + addsd xmm0,xmm4 # p8+p6+p4 + + andpd xmm2,xmm1 # x_h + mulsd xmm6,xmm1 # x*c1l + movlpd xmm13,qword ptr (sin_p1_0h-sin_p1_c)[rcx] + + addsd xmm7,xmm9 # p9+p7+p5+p3 + addsd xmm0,xmm8 # p8+p6+p4+p2 + + subsd xmm3,xmm2 # x_l + mulsd xmm2,xmm5 # x_h*c1h + + mulsd xmm7,xmm1 # (p9+p7+p5+p3)(*x) + + movsd xmm14,xmm13 + addsd xmm13,xmm2 # x_h*c1h+c0h + + addsd xmm0,xmm7 # p9+p8+..+p2 + + subsd xmm14,xmm13 # c0h-(x_h*c1h+c0h) + mulsd xmm3,xmm5 # x_l*c1h + +# addsd xmm0,qword ptr (sin_p1_0l-sin_p1_c)[rcx] + + addsd xmm14,xmm2 # (c0h-(x_h*c1h+c0h))+x_h*c1h + addsd xmm3,xmm6 # x_l*c1h+x*c1l + + addsd xmm3,xmm14 # (c0h-(x_h*c1h+c0h))+x_h*c1h+x_l*c1h+x*c1l + + addsd xmm0,xmm3 + addsd xmm0,xmm13 + ret + +sin_real_n_5: + addsd xmm0,qword ptr real_2_pi[rip] + movlpd xmm13,qword ptr real_m_2_pi_l[rip] + att_jmp sin_real_p + +sin_real_5: + subsd xmm0,qword ptr real_2_pi[rip] + movlpd xmm13,qword ptr real_2_pi_l[rip] + att_jmp sin_real_p + +cos_real_n_2: + addsd xmm0,qword ptr real_pi_d_2[rip] + movlpd xmm13,qword ptr real_m_pi_d_2_l[rip] + att_jmp sin_real_p + +cos_real_4: + subsd xmm0,qword ptr real_3_pi_d_2[rip] + movlpd xmm13,qword ptr real_3_pi_d_2_l[rip] + +sin_real_p: + ucomisd xmm0,qword ptr real_0_43540000008249979402[rip] + + movsd xmm12,xmm0 + + jae sin_real_p_p_0_6 + ucomisd xmm0,qword ptr real_m_0_43540000008249979402[rip] + jb sin_real_p_m_0_6 + + subsd xmm0,xmm13 + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + subsd xmm12,xmm1 + + movlpd xmm5,qword ptr sin_p_0[rip] + movlpd xmm6,qword ptr sin_p_1[rip] + + movsd xmm2,xmm1 # x + mulsd xmm1,xmm0 # x3 + + movsd xmm3,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + subsd xmm12,xmm13 # x_l + movsd xmm14,xmm3 # x2 + +# ucomisd xmm3,qword ptr real_0_25 # x2>=0.25 (|x|>0.5) + + mulsd xmm3,xmm1 # x5 + + movlpd xmm7,qword ptr sin_p_2[rip] + movlpd xmm8,qword ptr sin_p_3[rip] + + mulsd xmm5,xmm1 # p3 + mulsd xmm1,xmm0 # x7 + + mulsd xmm6,xmm3 # p5 + mulsd xmm3,xmm0 # x9 + + mulsd xmm7,xmm1 # p7 + mulsd xmm1,xmm0 # x11 + + mulsd xmm8,xmm3 # p9 + mulsd xmm3,xmm0 # x13 + + mulsd xmm14,xmm12 # x_l*x2 + + mulsd xmm1,qword ptr sin_p_4[rip] # p11 + mulsd xmm3,qword ptr sin_p_5[rip] # p13 + + movsd xmm0,xmm2 # x + + mulsd xmm14,qword ptr real_0_5[rip] # 0.5*x_l*x2 + + addsd xmm1,xmm3 # p11+p13 + + subsd xmm12,xmm14 # x_l-0.5*x_l*x2 + + addsd xmm1,xmm8 # p9+p11+p13 + +# jae sin_real_p_l + + addsd xmm1,xmm7 # p7+p9+p11+p13 + addsd xmm1,xmm6 # p5+p7+p9+p11+p13 + addsd xmm1,xmm5 # p3+p5+p7+p9+p11+p13 + + addsd xmm1,xmm12 + + addsd xmm0,xmm1 # x+p3+p5+p7+p9+p11+p13 + + ret + +#sin_real_p_l: +# movlpd xmm2,qword ptr real_47_bits +# andpd xmm2,xmm5 # p3h + +# addsd xmm1,xmm7 # p7+p9+p11+p13 + +# subsd xmm5,xmm2 # p3l +# addsd xmm0,xmm2 # x+p3h + +# addsd xmm1,xmm6 # p5+p7+p9+p11+p13 +# addsd xmm1,xmm5 # p3+p5+p7+p9+p11+p13 + +# addsd xmm1,xmm12 + +# addsd xmm0,xmm1 # x+p3+p5+p7+p9+p11+p13 + +# ret + +sin_real_p_m_0_6: + movlpd xmm1,qword ptr real_m_0_600000000082499762577[rip] + + subsd xmm0,xmm13 + + ucomisd xmm0,xmm1 + movsd xmm15,xmm0 # x1 + subsd xmm0,xmm1 + + lea rcx,sin_p3_c[rip] + lea rdx,sin_p4_c[rip] + + cmova rcx,rdx + jmp sin_real_p_pm_0_6 + +sin_real_p_p_0_6: + movlpd xmm1,qword ptr real_0_600000000082499762577[rip] + + subsd xmm0,xmm13 + + ucomisd xmm0,xmm1 + movsd xmm15,xmm0 # x1 + subsd xmm0,xmm1 + + lea rcx,sin_p1_c[rip] + lea rdx,sin_p2_c[rip] + + cmovb rcx,rdx + +sin_real_p_pm_0_6: + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + subsd xmm12,xmm15 + mulsd xmm15,xmm15 # x1*x1 + + movlpd xmm7,qword ptr (sin_p1_3-sin_p1_c)[rcx] + movlpd xmm8,qword ptr (sin_p1_2-sin_p1_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm7,xmm2 # p3 + mulsd xmm8,xmm2 # p2 + subsd xmm12,xmm13 # x_l + +sin_real_pm_0_6: + mulsd xmm15,qword ptr real_0_5[rip] # 0.5*x1*x1 + + movlpd xmm9,qword ptr (sin_p1_5-sin_p1_c)[rcx] + + mulsd xmm2,xmm0 # x6 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + mulsd xmm9,xmm4 # p5 + mulsd xmm4,qword ptr (sin_p1_4-sin_p1_c)[rcx] # p4 + + mulsd xmm15,xmm12 # x_l*0.5*x1*x1 + + movlpd xmm11,qword ptr (sin_p1_7-sin_p1_c)[rcx] + movlpd xmm10,qword ptr (sin_p1_9-sin_p1_c)[rcx] + + mulsd xmm11,xmm2 # p7 + mulsd xmm2,qword ptr (sin_p1_6-sin_p1_c)[rcx] # p6 + + mulsd xmm10,xmm0 # p9 + mulsd xmm0,qword ptr (sin_p1_8-sin_p1_c)[rcx] # p8 + + addsd xmm11,xmm10 # p9+p7 + addsd xmm0,xmm2 # p8+p6 + + subsd xmm12,xmm15 # x_l-x_l*0.5*x1*x1 + + movlpd xmm2,qword ptr real_26_bits[rip] + movsd xmm3,xmm1 # x + movlpd xmm6,qword ptr (sin_p1_1l-sin_p1_c)[rcx] + movlpd xmm5,qword ptr (sin_p1_1h-sin_p1_c)[rcx] + + addsd xmm9,xmm11 # p9+p7+p5 + addsd xmm0,xmm4 # p8+p6+p4 + + andpd xmm2,xmm1 # x_h + mulsd xmm6,xmm1 # x*c1l + movlpd xmm13,qword ptr (sin_p1_0h-sin_p1_c)[rcx] + + addsd xmm7,xmm9 # p9+p7+p5+p3 + addsd xmm0,xmm8 # p8+p6+p4+p2 + + subsd xmm3,xmm2 # x_l + mulsd xmm2,xmm5 # x_h*c1h + + mulsd xmm7,xmm1 # (p9+p7+p5+p3)(*x) + + movsd xmm14,xmm13 + addsd xmm13,xmm2 # x_h*c1h+c0h + + addsd xmm0,xmm7 # p9+p8+..+p2 + + subsd xmm14,xmm13 # c0h-(x_h*c1h+c0h) + mulsd xmm3,xmm5 # x_l*c1h + +# addsd xmm0,qword ptr (sin_p1_0l-sin_p1_c)[rcx] + addsd xmm0,xmm12 + + addsd xmm14,xmm2 # (c0h-(x_h*c1h+c0h))+x_h*c1h + addsd xmm3,xmm6 # x_l*c1h+x*c1l + + addsd xmm3,xmm14 # (c0h-(x_h*c1h+c0h))+x_h*c1h+x_l*c1h+x*c1l + + addsd xmm0,xmm3 + addsd xmm0,xmm13 + ret + +sin_real_3: + movlpd xmm1,qword ptr real_pi[rip] + movlpd xmm13,qword ptr real_pi_l[rip] + subsd xmm1,xmm0 + att_jmp sin_real_m + +cos_real_n_4: + movlpd xmm1,qword ptr real_m_3_pi_d_2[rip] + movlpd xmm13,qword ptr real_m_3_pi_d_2_l[rip] + subsd xmm1,xmm0 + att_jmp sin_real_m + +sin_real_n_3: + movlpd xmm1,qword ptr real_m_pi[rip] + movlpd xmm13,qword ptr real_m_pi_l[rip] + subsd xmm1,xmm0 + att_jmp sin_real_m + +cos_real_2: + movlpd xmm1,qword ptr real_pi_d_2[rip] + movlpd xmm13,qword ptr real_pi_d_2_l[rip] + subsd xmm1,xmm0 + +sin_real_m: + ucomisd xmm1,qword ptr real_0_43540000008249979402[rip] + + movsd xmm12,xmm1 + + jae sin_real_m_p_0_6 + ucomisd xmm1,qword ptr real_m_0_43540000008249979402[rip] + jbe sin_real_m_m_0_6 + + addsd xmm1,xmm13 + + movsd xmm0,xmm1 # x + mulsd xmm1,xmm1 # x2 + + subsd xmm12,xmm0 + + movlpd xmm5,qword ptr sin_p_0[rip] + movlpd xmm6,qword ptr sin_p_1[rip] + + movsd xmm2,xmm0 # x + mulsd xmm0,xmm1 # x3 + +# ucomisd xmm1,qword ptr real_0_25 # x2>=0.25 (|x|>0.5) + + movsd xmm3,xmm1 # x2 + mulsd xmm1,xmm1 # x4 + + addsd xmm12,xmm13 + + movsd xmm14,xmm3 + mulsd xmm3,xmm0 # x5 + + movlpd xmm7,qword ptr sin_p_2[rip] + movlpd xmm8,qword ptr sin_p_3[rip] + + mulsd xmm5,xmm0 # p3 + mulsd xmm0,xmm1 # x7 + + mulsd xmm6,xmm3 # p5 + mulsd xmm3,xmm1 # x9 + + mulsd xmm7,xmm0 # p7 + mulsd xmm0,xmm1 # x11 + + mulsd xmm8,xmm3 # p9 + mulsd xmm3,xmm1 # x13 + + mulsd xmm14,xmm12 + + mulsd xmm0,qword ptr sin_p_4[rip] # p11 + mulsd xmm3,qword ptr sin_p_5[rip] # p13 + + movsd xmm1,xmm2 # x + + mulsd xmm14,qword ptr real_0_5[rip] + + addsd xmm0,xmm3 # p11+p13 + + subsd xmm12,xmm14 + + addsd xmm0,xmm8 # p9+p11+p13 + +# jae sin_real_m_l + + addsd xmm0,xmm7 # p7+p9+p11+p13 + addsd xmm0,xmm6 # p5+p7+p9+p11+p13 + addsd xmm0,xmm5 # p3+p5+p7+p9+p11+p13 + + addsd xmm0,xmm12 + + addsd xmm0,xmm1 # x+p3+p5+p7+p9+p11+p13 + + ret + +#sin_real_m_l: +# movlpd xmm2,qword ptr real_47_bits +# andpd xmm2,xmm5 # p3h + +# addsd xmm0,xmm7 # p7+p9+p11+p13 + +# subsd xmm5,xmm2 # p3l +# addsd xmm1,xmm2 # x+p3h + +# addsd xmm0,xmm6 # p5+p7+p9+p11+p13 +# addsd xmm0,xmm5 # p3+p5+p7+p9+p11+p13 + +# addsd xmm0,xmm12 + +# addsd xmm0,xmm1 # x+p3+p5+p7+p9+p11+p13 + +# ret + +sin_real_m_m_0_6: + addsd xmm1,xmm13 + + movlpd xmm0,qword ptr real_0_600000000082499762577[rip] + + ucomisd xmm1,qword ptr real_m_0_600000000082499762577[rip] + movsd xmm15,xmm1 # x1 + addsd xmm0,xmm1 + + lea rcx,sin_p3_c[rip] + lea rdx,sin_p4_c[rip] + + cmova rcx,rdx + jmp sin_real_m_pm_0_6 + +sin_real_m_p_0_6: + addsd xmm1,xmm13 + + movlpd xmm0,qword ptr real_m_0_600000000082499762577[rip] + + ucomisd xmm1,qword ptr real_0_600000000082499762577[rip] + movsd xmm15,xmm1 # x1 + addsd xmm0,xmm1 + + lea rcx,sin_p1_c[rip] + lea rdx,sin_p2_c[rip] + + cmovb rcx,rdx + +sin_real_m_pm_0_6: + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + subsd xmm12,xmm15 + mulsd xmm15,xmm15 # x1*x1 + + movlpd xmm7,qword ptr (sin_p1_3-sin_p1_c)[rcx] + movlpd xmm8,qword ptr (sin_p1_2-sin_p1_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm7,xmm2 # p3 + mulsd xmm8,xmm2 # p2 + + addsd xmm12,xmm13 # x_l + att_jmp sin_real_pm_0_6 + + .globl cos_real + +cos_real: + ucomisd xmm0,qword ptr real_pi_d_4[rip] + jbe cos_real_1 # x<=pi/4 | NAN + ucomisd xmm0,qword ptr real_3_pi_d_4[rip] + att_jbe cos_real_2 + ucomisd xmm0,qword ptr real_5_pi_d_4[rip] + jbe cos_real_3 + ucomisd xmm0,qword ptr real_7_pi_d_4[rip] + att_jbe cos_real_4 + ucomisd xmm0,qword ptr real_9_pi_d_4[rip] + jbe cos_real_5 + + ucomisd xmm0,qword ptr real_36825084_pi[rip] + jbe cos_real_0 + + ucomisd xmm0,qword ptr real_2_p_53[rip] + att_jae sin_cos_or_tan_real_too_large # x>=-2^53 + + att_call rem_36825084_pi + + movlpd xmm2,qword ptr real_pi_d_4_26[rip] + movlpd xmm3,qword ptr real_pi_d_4_26_26[rip] + movlpd xmm13,qword ptr real_pi_d_4_52_l[rip] + + mulsd xmm2,xmm1 + mulsd xmm3,xmm1 + mulsd xmm13,xmm1 + + andpd xmm5,xmm1 + andpd xmm6,xmm1 + + subsd xmm13,xmm7 + jmp cos_real_0_ + +cos_real_0: + movlpd xmm1,qword ptr real_4_d_pi[rip] + + mulsd xmm1,xmm0 # x*4/pi + + addsd xmm1,qword ptr real_1_0[rip] # x*4/pi+1 + + movq xmm4,qword ptr round_even_c[rip] + movq xmm2,xmm1 + psrlq xmm1,52 + psubq xmm4,xmm1 + + movq xmm1,qword ptr mask_all_one[rip] + psllq xmm1,xmm4 + andpd xmm1,xmm2 # round_even (x*4/pi+1) + + movlpd xmm2,qword ptr real_pi_d_4_26[rip] + movlpd xmm3,qword ptr real_pi_d_4_26_26[rip] + movlpd xmm13,qword ptr real_pi_d_4_52_l[rip] + + movq xmm5,qword ptr mask_all_one_except_last[rip] + psllq xmm5,xmm4 + movq xmm6,qword ptr mask_all_one_except_second_last[rip] + psllq xmm6,xmm4 + + mulsd xmm2,xmm1 + mulsd xmm3,xmm1 + mulsd xmm13,xmm1 + + andpd xmm5,xmm1 + andpd xmm6,xmm1 + +cos_real_0_: + ucomisd xmm1,xmm5 + jne cos_real_sin + +cos_real_cos: + subsd xmm0,xmm2 + subsd xmm0,xmm3 + + ucomisd xmm1,xmm6 + att_je cos_real_p + att_jmp cos_real_m + +cos_real_sin: + ucomisd xmm1,xmm6 + jne cos_real_p_sin + + movsd xmm1,xmm3 + subsd xmm0,xmm2 + subsd xmm1,xmm0 + att_jmp sin_real_m + +cos_real_p_sin: + subsd xmm0,xmm2 + subsd xmm0,xmm3 + att_jmp sin_real_p + +cos_real_n: + ucomisd xmm0,qword ptr real_m_3_pi_d_4[rip] + att_jae cos_real_n_2 + ucomisd xmm0,qword ptr real_m_5_pi_d_4[rip] + jae cos_real_n_3 + ucomisd xmm0,qword ptr real_m_7_pi_d_4[rip] + att_jae cos_real_n_4 + ucomisd xmm0,qword ptr real_m_9_pi_d_4[rip] + jae cos_real_n_5 + + ucomisd xmm0,qword ptr real_m_36825084_pi[rip] + jae cos_real_n_0 + + ucomisd xmm0,qword ptr real_m_2_p_53[rip] + att_jbe sin_cos_or_tan_real_too_small_or_nan # x<=-2^53 | NAN + + att_call rem_n_36825084_pi + + movlpd xmm2,qword ptr real_pi_d_4_26[rip] + movlpd xmm3,qword ptr real_pi_d_4_26_26[rip] + movlpd xmm13,qword ptr real_pi_d_4_52_l[rip] + + mulsd xmm2,xmm1 + mulsd xmm3,xmm1 + mulsd xmm13,xmm1 + + andpd xmm5,xmm1 + andpd xmm6,xmm1 + + subsd xmm13,xmm7 + jmp cos_real_n_0_ + +cos_real_n_0: + movlpd xmm1,qword ptr real_4_d_pi[rip] + + mulsd xmm1,xmm0 # x*4/pi + + movq xmm4,qword ptr m_round_even_c[rip] + + subsd xmm1,qword ptr real_1_0[rip] # x*4/pi-1 + + movq xmm2,xmm1 + psrlq xmm1,52 + psubq xmm4,xmm1 + + movq xmm1,qword ptr mask_all_one[rip] + psllq xmm1,xmm4 + andpd xmm1,xmm2 # round_even (x*4/pi-1) + + movlpd xmm2,qword ptr real_pi_d_4_26[rip] + movlpd xmm3,qword ptr real_pi_d_4_26_26[rip] + movlpd xmm13,qword ptr real_pi_d_4_52_l[rip] + + movq xmm5,qword ptr mask_all_one_except_last[rip] + psllq xmm5,xmm4 + movq xmm6,qword ptr mask_all_one_except_second_last[rip] + psllq xmm6,xmm4 + + mulsd xmm2,xmm1 + mulsd xmm3,xmm1 + mulsd xmm13,xmm1 + + andpd xmm5,xmm1 + andpd xmm6,xmm1 + +cos_real_n_0_: + ucomisd xmm1,xmm5 + att_je cos_real_cos + + ucomisd xmm1,xmm6 + je cos_real_n_p_sin + + movsd xmm1,xmm3 + subsd xmm0,xmm2 + subsd xmm1,xmm0 + att_jmp sin_real_m + +cos_real_n_p_sin: + subsd xmm0,xmm2 + subsd xmm0,xmm3 + att_jmp sin_real_p + +cos_real_1: + ucomisd xmm0,qword ptr real_m_pi_d_4[rip] + att_jb cos_real_n # x<-pi/4 | NAN + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + movlpd xmm9,qword ptr real_17_bits[rip] + movlpd xmm5,qword ptr cos_p_0[rip] + + movsd xmm8,xmm1 # x + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + andpd xmm9,xmm1 # x_17_h + movlpd xmm10,qword ptr real_0_5[rip] + + movsd xmm3,xmm2 # x2 + mulsd xmm2,xmm0 # x6 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + movlpd xmm7,qword ptr cos_p_2[rip] + + mulsd xmm5,xmm4 # p4 + mulsd xmm4,xmm2 # x10 + + movsd xmm6,xmm2 # x6 + mulsd xmm2,xmm2 # x12 + + mulsd xmm7,xmm0 # p8 + mulsd xmm0,xmm6 # x14 + + mulsd xmm6,qword ptr cos_p_1[rip] # p6 + mulsd xmm4,qword ptr cos_p_3[rip] # p10 + mulsd xmm2,qword ptr cos_p_4[rip] # p12 + mulsd xmm0,qword ptr cos_p_5[rip] # p14 + + subsd xmm8,xmm9 # x_17_l + mulsd xmm10,xmm9 # 0.5*x_17_h + + addsd xmm0,xmm2 # p12+p14 + + mulsd xmm10,xmm9 # 0.5*x_17_h*x_17_h + movlpd xmm11,qword ptr real_1_0[rip] + + addsd xmm0,xmm4 # p10+p12+p14 + + mulsd xmm8,qword ptr real_0_5[rip] # 0.5*x_17_l + addsd xmm9,xmm1 # x+x_17_h + + addsd xmm0,xmm7 # p8+p10+p12+p14 + + mulsd xmm8,xmm9 # 0.5*x_17_l*(x+x_17_h) + + addsd xmm0,xmm6 # p6+p8+p10+p12+p14 + + ucomisd xmm10,qword ptr real_2_p_m_18[rip] # 0.5*x_17_h*x_17_h<2**-18 + jb cos_real_1_s + + subsd xmm11,xmm10 # 1.0-0.5*x_17_h*x_17_h + + addsd xmm0,xmm5 # p4+p6+p8+p10+p12+p14 + + subsd xmm0,xmm8 + addsd xmm0,xmm11 + + ret + +cos_real_1_s: + addsd xmm0,xmm5 # p4+p6+p8+p10+p12+p14 + + subsd xmm0,xmm8 + subsd xmm0,xmm10 + addsd xmm0,xmm11 + + ret + +cos_real_n_5: + addsd xmm0,qword ptr real_2_pi[rip] + movlpd xmm13,qword ptr real_m_2_pi_l[rip] + att_jmp cos_real_p + +cos_real_5: + subsd xmm0,qword ptr real_2_pi[rip] + movlpd xmm13,qword ptr real_2_pi_l[rip] + att_jmp cos_real_p + +sin_real_n_4: + addsd xmm0,qword ptr real_3_pi_d_2[rip] + movlpd xmm13,qword ptr real_m_3_pi_d_2_l[rip] + att_jmp cos_real_p + +sin_real_2: + subsd xmm0,qword ptr real_pi_d_2[rip] + movlpd xmm13,qword ptr real_pi_d_2_l[rip] + +cos_real_p: + movsd xmm12,xmm0 + subsd xmm0,xmm13 + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + subsd xmm12,xmm1 + + movlpd xmm9,qword ptr real_17_bits[rip] + movlpd xmm5,qword ptr cos_p_0[rip] + + movsd xmm8,xmm1 # x + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + subsd xmm12,xmm13 + + andpd xmm9,xmm1 # x_17_h + movlpd xmm10,qword ptr real_0_5[rip] + + movsd xmm3,xmm2 # x2 + mulsd xmm2,xmm0 # x6 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + movlpd xmm7,qword ptr cos_p_2[rip] + + mulsd xmm12,xmm8 + + mulsd xmm5,xmm4 # p4 + mulsd xmm4,xmm2 # x10 + + movsd xmm6,xmm2 # x6 + mulsd xmm2,xmm2 # x12 + + mulsd xmm7,xmm0 # p8 + mulsd xmm0,xmm6 # x14 + + mulsd xmm6,qword ptr cos_p_1[rip] # p6 + mulsd xmm4,qword ptr cos_p_3[rip] # p10 + mulsd xmm2,qword ptr cos_p_4[rip] # p12 + mulsd xmm0,qword ptr cos_p_5[rip] # p14 + + subsd xmm8,xmm9 # x_17_l + mulsd xmm10,xmm9 # 0.5*x_17_h + + addsd xmm0,xmm2 # p12+p14 + + mulsd xmm10,xmm9 # 0.5*x_17_h*x_17_h + movlpd xmm11,qword ptr real_1_0[rip] + + addsd xmm0,xmm4 # p10+p12+p14 + + mulsd xmm8,qword ptr real_0_5[rip] # 0.5*x_17_l + addsd xmm9,xmm1 # x+x_17_h + + addsd xmm0,xmm7 # p8+p10+p12+p14 + + mulsd xmm8,xmm9 # 0.5*x_17_l*(x+x_17_h) + + addsd xmm0,xmm6 # p6+p8+p10+p12+p14 + + ucomisd xmm10,qword ptr real_2_p_m_18[rip] # 0.5*x_17_h*x_17_h<2**-18 + jb cos_real__s + + subsd xmm11,xmm10 # 1.0-0.5*x_17_h*x_17_h + + addsd xmm0,xmm5 # p4+p6+p8+p10+p12+p14 + + addsd xmm8,xmm12 + + subsd xmm0,xmm8 + addsd xmm0,xmm11 + + ret + +cos_real__s: + addsd xmm0,xmm5 # p4+p6+p8+p10+p12+p14 + + addsd xmm8,xmm12 + + subsd xmm0,xmm8 + subsd xmm0,xmm10 + addsd xmm0,xmm11 + + ret + +sin_real_4: + subsd xmm0,qword ptr real_3_pi_d_2[rip] + movlpd xmm13,qword ptr real_3_pi_d_2_l[rip] + att_jmp cos_real_m + +cos_real_n_3: + addsd xmm0,qword ptr real_pi[rip] + movlpd xmm13,qword ptr real_m_pi_l[rip] + att_jmp cos_real_m + +sin_real_n_2: + addsd xmm0,qword ptr real_pi_d_2[rip] + movlpd xmm13,qword ptr real_m_pi_d_2_l[rip] + att_jmp cos_real_m + +cos_real_3: + subsd xmm0,qword ptr real_pi[rip] + movlpd xmm13,qword ptr real_pi_l[rip] + +cos_real_m: + movsd xmm12,xmm0 + subsd xmm0,xmm13 + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + subsd xmm12,xmm1 + + movlpd xmm9,qword ptr real_17_bits[rip] + movlpd xmm5,qword ptr cos_p_0[rip] + + movsd xmm8,xmm1 # x + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + subsd xmm12,xmm13 + + andpd xmm9,xmm1 # x_17_h + movlpd xmm10,qword ptr real_0_5[rip] + + movsd xmm3,xmm2 # x2 + mulsd xmm2,xmm0 # x6 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + movlpd xmm7,qword ptr cos_p_2[rip] + + mulsd xmm12,xmm8 + + mulsd xmm5,xmm4 # p4 + mulsd xmm4,xmm2 # x10 + + movsd xmm6,xmm2 # x6 + mulsd xmm2,xmm2 # x12 + + mulsd xmm7,xmm0 # p8 + mulsd xmm0,xmm6 # x14 + + mulsd xmm6,qword ptr cos_p_1[rip] # p6 + mulsd xmm4,qword ptr cos_p_3[rip] # p10 + mulsd xmm2,qword ptr cos_p_4[rip] # p12 + mulsd xmm0,qword ptr cos_p_5[rip] # p14 + + subsd xmm8,xmm9 # x_17_l + mulsd xmm10,xmm9 # 0.5*x_17_h + + addsd xmm0,xmm2 # p12+p14 + + mulsd xmm10,xmm9 # 0.5*x_17_h*x_17_h + movlpd xmm11,qword ptr real_m_1_0[rip] + + addsd xmm0,xmm4 # p10+p12+p14 + + mulsd xmm8,qword ptr real_0_5[rip] # 0.5*x_17_l + addsd xmm9,xmm1 # x+x_17_h + + addsd xmm0,xmm7 # p8+p10+p12+p14 + + mulsd xmm8,xmm9 # 0.5*x_17_l*(x+x_17_h) + + addsd xmm0,xmm6 # p6+p8+p10+p12+p14 + + ucomisd xmm10,qword ptr real_2_p_m_18[rip] # 0.5*x_17_h*x_17_h<2**-18 + jb cos_real_m_s + + addsd xmm11,xmm10 # -1.0+0.5*x_17_h*x_17_h + + addsd xmm0,xmm5 # p4+p6+p8+p10+p12+p14 + + addsd xmm8,xmm12 + + subsd xmm8,xmm0 + movsd xmm0,xmm11 + addsd xmm0,xmm8 + ret + +cos_real_m_s: + addsd xmm0,xmm5 # p4+p6+p8+p10+p12+p14 + + addsd xmm8,xmm12 + + subsd xmm8,xmm0 + + movsd xmm0,xmm11 + addsd xmm8,xmm10 + + addsd xmm0,xmm8 + ret + + .globl tan_real + +tan_real: + ucomisd xmm0,qword ptr real_m_0_338[rip] + jb tan_real_n # x<-0.338 | NAN + ucomisd xmm0,qword ptr real_0_89[rip] + jbe tan_real_1 + ucomisd xmm0,qword ptr real_pi_m_0_699[rip] + jbe tan_real_2 + ucomisd xmm0,qword ptr real_pi_p_0_89[rip] + jbe tan_real_3 + ucomisd xmm0,qword ptr real_2pi_m_0_699[rip] + jbe tan_real_4 + ucomisd xmm0,qword ptr real_2pi_p_0_89[rip] + jbe tan_real_5 + + ucomisd xmm0,qword ptr real_36825084_pi[rip] + jbe tan_real_0 + + ucomisd xmm0,qword ptr real_2_p_53[rip] + att_jae sin_cos_or_tan_real_too_large # x>=-2^53 + + att_call rem_36825084_pi + +tan_real_pn_l: + movlpd xmm2,qword ptr real_pi_d_4_26[rip] + movlpd xmm3,qword ptr real_pi_d_4_26_26[rip] + movlpd xmm12,qword ptr real_pi_d_4_52_l[rip] + + mulsd xmm2,xmm1 + mulsd xmm3,xmm1 + mulsd xmm12,xmm1 + + subsd xmm12,xmm7 + jmp tan_real_ + +tan_real_0: + movlpd xmm1,qword ptr real_4_d_pi[rip] + + mulsd xmm1,xmm0 # x*4/pi + + addsd xmm1,qword ptr real_1_0[rip] # x*4/pi+1 + + movq xmm4,qword ptr round_even_c[rip] +tan_real_pn_0: + movq xmm2,xmm1 + psrlq xmm1,52 + psubq xmm4,xmm1 + movq xmm1,qword ptr mask_all_one[rip] + psllq xmm1,xmm4 + andpd xmm1,xmm2 # round_even (x*4/pi+1) + + movlpd xmm2,qword ptr real_pi_d_4_26[rip] + movlpd xmm3,qword ptr real_pi_d_4_26_26[rip] + movlpd xmm12,qword ptr real_pi_d_4_52_l[rip] + + movq xmm5,qword ptr mask_all_one_except_last[rip] + psllq xmm5,xmm4 + + mulsd xmm2,xmm1 + mulsd xmm3,xmm1 + mulsd xmm12,xmm1 + +tan_real_: + andpd xmm5,xmm1 + ucomisd xmm1,xmm5 + jne tan_real_0_2 + + subsd xmm0,xmm2 + subsd xmm0,xmm3 + + ucomisd xmm0,qword ptr real_0_338[rip] + ja tan_real_0_1p + ucomisd xmm0,qword ptr real_m_0_338[rip] + jae tan_real_3_ + + ucomisd xmm0,qword ptr real_m_0_542[rip] + ja tan_real_0_5_n + ucomisd xmm0,qword ptr real_m_0_699[rip] + ja tan_real_s_0_5_n + jmp tan_real_1_0_3_n + +tan_real_0_1p: + ucomisd xmm0,qword ptr real_0_542[rip] + jb tan_real_0_5_p + ucomisd xmm0,qword ptr real_0_699[rip] + jb tan_real_s_0_5_p + jmp tan_real_1_0_3 + +tan_real_0_2: + subsd xmm2,xmm0 + movsd xmm1,xmm3 + addsd xmm1,xmm2 + + ucomisd xmm1,qword ptr real_0_5_pi_m_1_18[rip] + ja tan_real_0_2p + ucomisd xmm1,qword ptr real_1_18_m_0_5_pi[rip] + jae tan_real_2_ + + ucomisd xmm1,qword ptr real_1_04_m_0_5_pi[rip] + ja tan_real_2_0_n + ucomisd xmm1,qword ptr real_0_89_m_0_5_pi[rip] + ja tan_real_s_2_n + jmp tan_real_1_0_n + +tan_real_0_2p: + ucomisd xmm1,qword ptr real_0_5_pi_m_1_04[rip] + jb tan_real_2_0_p + ucomisd xmm1,qword ptr real_0_5_pi_m_0_89[rip] + jb tan_real_s_2_p + jmp tan_real_1_0_2_p + +tan_real_n: + ucomisd xmm0,qword ptr real_m_0_89[rip] + jae tan_real_n_1 + ucomisd xmm0,qword ptr real_n_pi_m_0_699[rip] + jae tan_real_n_2 + ucomisd xmm0,qword ptr real_n_pi_p_0_89[rip] + jae tan_real_n_3 + ucomisd xmm0,qword ptr real_n_2pi_m_0_699[rip] + jae tan_real_n_4 + ucomisd xmm0,qword ptr real_n_2pi_p_0_89[rip] + jae tan_real_n_5 + + ucomisd xmm0,qword ptr real_m_36825084_pi[rip] + jae tan_real_n_0 + + ucomisd xmm0,qword ptr real_m_2_p_53[rip] + att_jbe sin_cos_or_tan_real_too_small_or_nan # x<=-2^53 | NAN + + att_call rem_n_36825084_pi + att_jmp tan_real_pn_l + +tan_real_n_0: + movlpd xmm1,qword ptr real_4_d_pi[rip] + + mulsd xmm1,xmm0 # x*4/pi + + subsd xmm1,qword ptr real_1_0[rip] # x*4/pi-1 + + movq xmm4,qword ptr m_round_even_c[rip] + att_jmp tan_real_pn_0 + +tan_real_1: + ucomisd xmm0,qword ptr real_0_699[rip] + ja tan_real_1_2 + ucomisd xmm0,qword ptr real_0_338[rip] + ja tan_real_0_5 + +tan_real_1_0: + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + movlpd xmm6,qword ptr tan_q_1[rip] + movlpd xmm5,qword ptr tan_q_2[rip] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movlpd xmm7,qword ptr tan_p_1[rip] + mulsd xmm6,xmm2 # q2 + + movsd xmm3,xmm2 # x2 + mulsd xmm2,xmm0 # x6 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + mulsd xmm5,xmm4 # q4 + mulsd xmm2,qword ptr tan_q_3[rip] # q6 + + mulsd xmm4,qword ptr tan_p_2[rip] # p4 + + addsd xmm0,xmm2 # x8+q6 + + mulsd xmm7,xmm3 # p2 + + addsd xmm0,xmm5 # x8+q6+q4 + + addsd xmm4,xmm7 # p4+p2 + + addsd xmm0,xmm6 # x8+q6+q4+q2 + + addsd xmm4,qword ptr tan_p_0[rip] # p4+p2+p0 + + addsd xmm0,qword ptr tan_q_0[rip] # x8+q6+q4+q2+q0 + + divsd xmm4,xmm0 # p/q + + mulsd xmm3,xmm1 # x3 + movsd xmm0,xmm1 # x + + mulsd xmm3,xmm4 + addsd xmm0,xmm3 + + ret + +tan_real_m_0_5: + ucomisd xmm0,qword ptr real_m_0_542[rip] + jb tan_real_n_s_0_5 + + movlpd xmm1,qword ptr atan_0_5_53[rip] + movlpd xmm10,qword ptr real_49_bits[rip] + + addsd xmm1,xmm0 + andpd xmm10,xmm0 # x_h + + lea rdx,tan_n_0_5_t[rip] + addsd xmm1,qword ptr atan_0_5_53_l[rip] # y=x+atan 0.5 + jmp tan_real_0_5_a_s_0_5_a_s_2 + +tan_real_n_s_0_5: + movlpd xmm1,qword ptr atan_sqrt_0_5[rip] + movlpd xmm10,qword ptr real_51_bits[rip] + + addsd xmm1,xmm0 + andpd xmm10,xmm0 # x_h + + lea rdx,tan_n_s_0_5_t[rip] + addsd xmm1,qword ptr atan_sqrt_0_5_l[rip] # y=x+atan sqrt 0.5 + att_jmp tan_real_0_5_a_s_0_5_a_s_2 + +tan_real_0_5: + ucomisd xmm0,qword ptr real_0_542[rip] + ja tan_real_s_0_5 + + movlpd xmm1,qword ptr m_atan_0_5_53[rip] + movlpd xmm10,qword ptr real_49_bits[rip] + + addsd xmm1,xmm0 + andpd xmm10,xmm0 # x_h + + lea rdx,tan_0_5_t[rip] + subsd xmm1,qword ptr atan_0_5_53_l[rip] # y=x-atan 0.5 + att_jmp tan_real_0_5_a_s_0_5_a_s_2 + +tan_real_s_0_5: + movlpd xmm1,qword ptr m_atan_sqrt_0_5[rip] + movlpd xmm10,qword ptr real_51_bits[rip] + + addsd xmm1,xmm0 + andpd xmm10,xmm0 # x_h + + lea rdx,tan_s_0_5_t[rip] + subsd xmm1,qword ptr atan_sqrt_0_5_l[rip] # y=x-atan sqrt 0.5 + +tan_real_0_5_a_s_0_5_a_s_2: + subsd xmm0,xmm10 # x_l + + movsd xmm2,xmm1 # y + mulsd xmm1,xmm1 # y2 + + mulsd xmm10,qword ptr [rdx] # 1.25|1.5|3*x_h + + movlpd xmm7,qword ptr tan3_q_1[rip] + movlpd xmm6,qword ptr tan3_q_2[rip] + + movsd xmm3,xmm1 # y2 + mulsd xmm1,xmm1 # y4 + + movlpd xmm9,qword ptr tan3_q_3[rip] + mulsd xmm7,xmm3 # q2 + + movsd xmm4,xmm3 # y2 + mulsd xmm3,xmm1 # y6 + + movlpd xmm8,qword ptr tan3_p_0[rip] + + movsd xmm5,xmm1 # y4 + mulsd xmm1,xmm1 # y8 + + mulsd xmm9,xmm3 # q6 + mulsd xmm3,qword ptr tan3_p_2[rip] # p6 + + mulsd xmm6,xmm5 # q4 + mulsd xmm5,qword ptr tan3_p_1[rip] # p4 + + addsd xmm1,xmm9 # y8+q6 + + mulsd xmm8,xmm4 # p2 + + addsd xmm1,xmm6 # y8+q6+q4 + + mulsd xmm0,qword ptr [rdx] # 1.25|1.5|3*x_l + + movlpd xmm6,qword ptr 8[rdx] # 0.5|sqrt 0.5|sqrt 2 + + addsd xmm3,xmm5 # p6+p4 + addsd xmm1,xmm7 # y8+q6+q4+q2 + + mulsd xmm6,xmm2 # 0.5|sqrt 0.5|sqrt 2*y + + addsd xmm3,xmm8 # p6+p4+p2 + addsd xmm1,qword ptr tan3_q_0[rip] # y8+q6+q4+q2+q0 + + movsd xmm4,xmm3 # p + addsd xmm4,xmm1 # p+q + + mulsd xmm4,xmm6 # 0.5|sqrt 0.5|sqrt 2*y*(p+q) + + mulsd xmm2,qword ptr [rdx] # 1.25|1.5|3*y + + addsd xmm3,xmm4 # p+y*(p+q) + subsd xmm1,xmm4 # q-y*(p+q) + + divsd xmm3,xmm1 # (p-y*(p+q))/(q+y*(p+q)) + + addsd xmm0,qword ptr 16[rdx] # (0.5-1.25atan0.5)_49_l | (sqrt0.5-1.5atan(sqrt0.5))_l | (sqrt2-3atan(sqrt2))_l + + mulsd xmm2,xmm3 # 1.25|1.5|3 * y * (p-y*(p+q))/(q+y*(p+q)) + + addsd xmm10,qword ptr 24[rdx] # (0.5-1.25atan0.5)_49 | sqrt0.5-1.5atan(sqrt0.5) | sqrt2-3atan(sqrt2) + + addsd xmm0,xmm2 + + addsd xmm0,xmm10 # 0.5|sqrt 0.5|sqrt 2+1.25|1.5|3y+1.25|1.5|3y * .. + + ret + +tan_real_n_1: + ucomisd xmm0,qword ptr real_m_0_699[rip] + att_ja tan_real_m_0_5 + +tan_real_n_1_2: + movlpd xmm1,qword ptr real_pi_d_4[rip] + + addsd xmm1,xmm0 + addsd xmm0,xmm0 # 2x + + addsd xmm1,qword ptr real_pi_d_4_l[rip] # y=pi/4+x + + addsd xmm0,qword ptr real_pi_d_2_m_1_0_52[rip] # 1+2x-pi/2=1-2y + movlpd xmm10,qword ptr real_m_pi_d_2_m_1_0_52_l[rip] + movlpd xmm11,qword ptr real_m_2_0[rip] + jmp tan_real_1_2_ + +tan_real_1_2: + movlpd xmm1,qword ptr real_pi_d_4[rip] + + subsd xmm1,xmm0 + addsd xmm0,xmm0 # 2x + + addsd xmm1,qword ptr real_pi_d_4_l[rip] # y=pi/4-x + subsd xmm0,qword ptr real_pi_d_2_m_1_0_52[rip] # 1+2x-pi/2=1-2y + movlpd xmm10,qword ptr real_pi_d_2_m_1_0_52_l[rip] + movlpd xmm11,qword ptr real_2_0[rip] + +tan_real_1_2_: + movsd xmm2,xmm1 # y + mulsd xmm1,xmm1 # y2 + + movlpd xmm7,qword ptr tan3_q_1[rip] + movlpd xmm6,qword ptr tan3_q_2[rip] + + movsd xmm3,xmm1 # y2 + mulsd xmm1,xmm1 # y4 + + movlpd xmm9,qword ptr tan3_q_3[rip] + mulsd xmm7,xmm3 # q2 + + movsd xmm4,xmm3 # y2 + mulsd xmm3,xmm1 # y6 + + movlpd xmm8,qword ptr tan3_p_0[rip] + + movsd xmm5,xmm1 # y4 + mulsd xmm1,xmm1 # y8 + + mulsd xmm9,xmm3 # q6 + mulsd xmm3,qword ptr tan3_p_2[rip] # p6 + + mulsd xmm6,xmm5 # q4 + mulsd xmm5,qword ptr tan3_p_1[rip] # p4 + + addsd xmm1,xmm9 # y8+q6 + + mulsd xmm8,xmm4 # p2 + + addsd xmm1,xmm6 # y8+q6+q4 + + addsd xmm3,xmm5 # p6+p4 + addsd xmm1,xmm7 # y8+q6+q4+q2 + + addsd xmm3,xmm8 # p6+p4+p2 + addsd xmm1,qword ptr tan3_q_0[rip] # y8+q6+q4+q2+q0 + + movsd xmm4,xmm3 # p + addsd xmm4,xmm1 # p+q + + mulsd xmm4,xmm2 # y*(p+q) + mulsd xmm2,xmm11 # 2y + + subsd xmm3,xmm4 # p-y*(p+q) + addsd xmm1,xmm4 # q+y*(p+q) + + divsd xmm3,xmm1 # (p-y*(p+q))/(q+y*(p+q)) + + mulsd xmm2,xmm3 # 2y * (p-y*(p+q))/(q+y*(p+q)) + + addsd xmm2,xmm10 + + subsd xmm0,xmm2 # 1-2y-2y * .. + + ret + +tan_real_n_4: + ucomisd xmm0,qword ptr real_n_pi_p_1_18[rip] + + movlpd xmm1,qword ptr real_m_3_pi_d_2[rip] + movlpd xmm12,qword ptr real_m_3_pi_d_2_l[rip] + + ja tan_real_n_2_0_4_n + + ucomisd xmm0,qword ptr real_n_2pi_m_0_89[rip] + jb tan_real_n_1_0_2 + ucomisd xmm0,qword ptr real_n_2pi_m_1_18[rip] + jb tan_real_n_2_0_4_p + jmp tan_real_24 + +tan_real_4: + ucomisd xmm0,qword ptr real_pi_p_1_18[rip] + + movlpd xmm1,qword ptr real_3_pi_d_2[rip] + movlpd xmm12,qword ptr real_3_pi_d_2_l[rip] + + jb tan_real_2_0_2_p + + ucomisd xmm0,qword ptr real_2pi_m_0_89[rip] + ja tan_real_1_0_2 + ucomisd xmm0,qword ptr real_2pi_m_1_18[rip] + ja tan_real_2_0_4_n + att_jmp tan_real_24 + +tan_real_n_2: + ucomisd xmm0,qword ptr real_m_1_18[rip] + ja tan_real_n_s_2 + + movlpd xmm1,qword ptr real_m_pi_d_2[rip] + movlpd xmm12,qword ptr real_m_pi_d_2_l[rip] + + ucomisd xmm0,qword ptr real_n_pi_m_0_89[rip] + att_jb tan_real_n_1_0_2 + ucomisd xmm0,qword ptr real_n_pi_m_1_18[rip] + jb tan_real_n_2_0_2_n + + att_jmp tan_real_24 + +tan_real_2: + ucomisd xmm0,qword ptr real_1_18[rip] + jb tan_real_s_2 + + movlpd xmm1,qword ptr real_pi_d_2[rip] + movlpd xmm12,qword ptr real_pi_d_2_l[rip] + + ucomisd xmm0,qword ptr real_pi_m_0_89[rip] + att_ja tan_real_1_0_2 + ucomisd xmm0,qword ptr real_pi_m_1_18[rip] + ja tan_real_2_0_2_n + +tan_real_24: + subsd xmm1,xmm0 # y_1 + +tan_real_2_: + movlpd xmm2,qword ptr real_18_bits[rip] + movsd xmm3,xmm12 + + andpd xmm2,xmm1 # y_1_h + addsd xmm3,xmm1 # y + + movsd xmm6,xmm1 # y_1 + subsd xmm1,xmm2 # y_1-y_1_h + + movsd xmm0,xmm2 # y_1_h + addsd xmm2,xmm3 # y+y_1_h + addsd xmm1,xmm12 # y_1_l + + mulsd xmm0,xmm0 # y_1_h^2 + mulsd xmm1,xmm2 # (y+y_1_h)*y_1_l + + movsd xmm13,xmm0 # y_1_h^2 + addsd xmm0,xmm1 # y^2 + + movsd xmm14,xmm1 # (y_1^2)_l + movsd xmm1,xmm3 # y + subsd xmm3,xmm6 # y-y_1 + + movlpd xmm5,qword ptr tan2_q_1[rip] + movlpd xmm9,qword ptr tan2_p_0[rip] + + subsd xmm12,xmm3 # y_s + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm5,xmm2 # q2 + mulsd xmm9,xmm2 # p2 + + movlpd xmm6,qword ptr tan2_q_2[rip] + movlpd xmm11,qword ptr tan2_p_1[rip] + + movsd xmm3,xmm2 # x2 + mulsd xmm2,xmm1 # x3 + + movsd xmm8,xmm1 # x + + movlpd xmm10,qword ptr real_0_5[rip] + + mulsd xmm6,xmm0 # q4 + mulsd xmm11,xmm0 # p4 + + addsd xmm5,xmm6 # q4+q2 + + movlpd xmm7,qword ptr real_1_0[rip] + + mulsd xmm10,xmm3 # 0.5*x2 + mulsd xmm3,xmm0 # x6 + + addsd xmm5,qword ptr tan2_q_0[rip] # q4+q2+q0 + + mulsd xmm0,xmm0 # x8 + mulsd xmm3,qword ptr tan2_p_2[rip] # p6 + + mulsd xmm5,xmm2 # (q4+q2+q0)*x3 + + mulsd xmm0,qword ptr tan2_p_3[rip] # p8 + + movsd xmm2,xmm5 # (q4+q2+q0)*x3 + addsd xmm5,xmm8 # (q4+q2+q0)*x3+x + + addsd xmm0,xmm3 # p8+p6 + + divsd xmm7,xmm5 # 1/(q4+q2+q0)*x3+x + + movlpd xmm4,qword ptr real_25_bits[rip] + + addsd xmm0,xmm11 # p8+p6+p4 + + andpd xmm5,xmm4 # q_h + + addsd xmm0,xmm9 # p8+p6+p4+p2 + + movlpd xmm9,qword ptr real_18_bits[rip] + +# ucomisd xmm8,qword ptr real_0_4 + ucomisd xmm10,qword ptr real_2_p_m_31[rip] + + subsd xmm8,xmm5 # x-q_h + + addsd xmm8,xmm2 # q_l=q-q_h + movlpd xmm6,qword ptr real_1_0[rip] + + jb tan_real_2_s # x<1.25*2**-16 + +tan_real_2_1: + movsd xmm3,xmm0 # p + + mulsd xmm14,qword ptr real_0_5[rip] # (0.5*x2)_l + mulsd xmm13,qword ptr real_0_5[rip] # (0.5*x2)_h + + subsd xmm3,xmm10 # p-0.5*x2 + addsd xmm8,xmm12 # q_l+y_s + + subsd xmm0,xmm14 # p-(0.5*x2)_l + + andpd xmm9,xmm7 # (1/q)_h + + mulsd xmm5,xmm9 # q_h*(1/q)_h + mulsd xmm8,xmm9 # q_l*(1/q)_h + + mulsd xmm13,xmm9 # (0.5*x2)_h*(1/q)_h + mulsd xmm0,xmm9 # (p-(0.5*x2)_l)*(1/q)_h + movlpd xmm1,qword ptr real_21_bits[rip] + + subsd xmm6,xmm5 # 1-q_h*(1/q)_h + + andpd xmm1,xmm0 # ((p-(0.5*x2)_l)*(1/q)_h)_h + + subsd xmm6,xmm8 # 1-(q_h+q_l)*(1/q)_h + + subsd xmm0,xmm1 # ((p-(0.5*x2)_l)*(1/q)_h)_l + subsd xmm1,xmm13 # ((p-(0.5*x2)_l)*(1/q)_h)_h-(0.5*x2)_h*(1/q)_h + movlpd xmm2,qword ptr real_21_bits[rip] + + mulsd xmm7,xmm6 # (1/q)_l + + andpd xmm2,xmm1 # (((p-(0.5*x2)_l)*(1/q)_h)_h-(0.5*x2)_h*(1/q)_h)_h + + mulsd xmm3,xmm7 # (p-0.5x2)*(1/q)_l + + subsd xmm1,xmm2 # (((p-(0.5*x2)_l)*(1/q)_h)_h-(0.5*x2)_h*(1/q)_l + addsd xmm2,xmm9 # (((p-(0.5*x2)_l)*(1/q)_h)_h-(0.5*x2)_h*(1/q)_h)_h+(1/q)_h + + addsd xmm0,xmm3 # ((p-(0.5*x2)_l)*(1/q)_h)_l+(p-0.5x2)*(1/q)_l + + addsd xmm0,xmm7 # ((p-(0.5*x2)_l)*(1/q)_h)_l+(p-0.5x2)*(1/q)_l+(1/q)_l + + addsd xmm0,xmm1 # ((p-(0.5*x2)_l)*(1/q)_h)+(p-0.5x2)*(1/q)_l+(1/q)_l + # -(0.5*x2)_h*(1/q)_l + addsd xmm0,xmm2 + + ret + +tan_real_2_s: + subsd xmm0,xmm10 # p-0.5*x2 + addsd xmm8,xmm12 # q_l+y_s + + andpd xmm9,xmm7 # (1/q)_h + + mulsd xmm0,xmm7 # (p-0.5*x2)*(1/q) + + mulsd xmm5,xmm9 # q_h*(1/q)_h + mulsd xmm8,xmm9 # q_l*(1/q)_h + + subsd xmm6,xmm5 # 1-q_h*(1/q)_h + + subsd xmm6,xmm8 # 1-(q_h+q_l)*(1/q)_h + + mulsd xmm6,xmm7 # (1/q)_l + + addsd xmm0,xmm6 # (p/q)_l+(1/q)_l + + addsd xmm0,xmm9 # p/q+1/q + + ret + +# x<0.4 +# subsd xmm0,xmm10 # p-0.5*x2 +# addsd xmm8,xmm12 # q_l+y_s + +# andpd xmm9,xmm7 # (1/q)_h + +# mulsd xmm0,xmm7 # (p-0.5*x2)*(1/q) + +# movlpd xmm4,qword ptr real_21_bits + +# mulsd xmm5,xmm9 # q_h*(1/q)_h +# mulsd xmm8,xmm9 # q_l*(1/q)_h + +# andpd xmm4,xmm0 # (p/q)_h + +# subsd xmm6,xmm5 # 1-q_h*(1/q)_h + +# subsd xmm0,xmm4 # (p/q)_l +# addsd xmm4,xmm9 # (p/q)_h+(1/q)_h + +# subsd xmm6,xmm8 # 1-(q_h+q_l)*(1/q)_h + +# mulsd xmm6,xmm7 # (1/q)_l + +# addsd xmm0,xmm6 # (p/q)_l+(1/q)_l + +# addsd xmm0,xmm4 # p/q+1/q + +# ret + +tan_real_n_s_2: + ucomisd xmm0,qword ptr real_m_1_04[rip] + jb tan_real_n_2_0 + + movlpd xmm1,qword ptr atan_sqrt_2[rip] + movlpd xmm10,qword ptr real_51_bits[rip] + + lea rdx,tan_n_s_2_t[rip] + + addsd xmm1,xmm0 + andpd xmm10,xmm0 # x_h + + addsd xmm1,qword ptr atan_sqrt_2_l[rip] # y=x+atan sqrt 2 + att_jmp tan_real_0_5_a_s_0_5_a_s_2 + +tan_real_s_2: + ucomisd xmm0,qword ptr real_1_04[rip] + ja tan_real_2_0 + + movlpd xmm1,qword ptr m_atan_sqrt_2[rip] + movlpd xmm10,qword ptr real_51_bits[rip] + + lea rdx,tan_s_2_t[rip] + + addsd xmm1,xmm0 + andpd xmm10,xmm0 # x_h + + subsd xmm1,qword ptr atan_sqrt_2_l[rip] # y=x-atan sqrt 2 + att_jmp tan_real_0_5_a_s_0_5_a_s_2 + +tan_real_n_2_0: + movlpd xmm1,qword ptr atan_2_53[rip] + movlpd xmm10,qword ptr real_49_bits[rip] + + addsd xmm1,xmm0 + andpd xmm10,xmm0 # x_h + + addsd xmm1,qword ptr atan_2_53_l[rip] # y=x+atan 2 + subsd xmm0,xmm10 # x_l + + movlpd xmm11,qword ptr real_5_atan_2_m_2_53_l[rip] + movlpd xmm12,qword ptr real_5_atan_2_m_2_53[rip] + movlpd xmm13,qword ptr real_m_2_0[rip] + jmp tan_real_2_0_ + +tan_real_2_0: + movlpd xmm1,qword ptr m_atan_2_53[rip] + movlpd xmm10,qword ptr real_49_bits[rip] + + addsd xmm1,xmm0 + andpd xmm10,xmm0 # x_h + + subsd xmm1,qword ptr atan_2_53_l[rip] # y=x-atan 2 + subsd xmm0,xmm10 # x_l + + movlpd xmm11,qword ptr real_n_2_m_5_atan_2_53_l[rip] + movlpd xmm12,qword ptr real_n_2_m_5_atan_2_53[rip] + movlpd xmm13,qword ptr real_2_0[rip] + +tan_real_2_0_: + movsd xmm2,xmm1 # y + mulsd xmm1,xmm1 # y2 + + mulsd xmm10,qword ptr real_5_0[rip] # 5*x_h + + movlpd xmm7,qword ptr tan3_q_1[rip] + movlpd xmm6,qword ptr tan3_q_2[rip] + + movsd xmm3,xmm1 # y2 + mulsd xmm1,xmm1 # y4 + + movlpd xmm9,qword ptr tan3_q_3[rip] + mulsd xmm7,xmm3 # q2 + + movsd xmm4,xmm3 # y2 + mulsd xmm3,xmm1 # y6 + + movlpd xmm8,qword ptr tan3_p_0[rip] + + movsd xmm5,xmm1 # y4 + mulsd xmm1,xmm1 # y8 + + mulsd xmm9,xmm3 # q6 + mulsd xmm3,qword ptr tan3_p_2[rip] # p6 + + mulsd xmm6,xmm5 # q4 + mulsd xmm5,qword ptr tan3_p_1[rip] # p4 + + addsd xmm1,xmm9 # y8+q6 + + mulsd xmm8,xmm4 # p2 + + addsd xmm1,xmm6 # y8+q6+q4 + + mulsd xmm0,qword ptr real_5_0[rip] # 5*x_l + + addsd xmm3,xmm5 # p6+p4 + addsd xmm1,xmm7 # y8+q6+q4+q2 + + mulsd xmm13,xmm2 # 2*y + + addsd xmm3,xmm8 # p6+p4+p2 + addsd xmm1,qword ptr tan3_q_0[rip] # y8+q6+q4+q2+q0 + + movsd xmm4,xmm3 # p + addsd xmm4,xmm1 # p+q + + mulsd xmm4,xmm13 # 2*y*(p+q) + +# mulsd xmm2,qword ptr real_5_0 # 5*y + + addsd xmm3,xmm4 # p+y*(p+q) + subsd xmm1,xmm4 # q-y*(p+q) + + divsd xmm3,xmm1 # (p-y*(p+q))/(q+y*(p+q)) + + addsd xmm0,xmm11 + + mulsd xmm2,xmm3 # y * (p-y*(p+q))/(q+y*(p+q)) + + addsd xmm10,xmm12 # 2+5*x_h-5*atan 2=2-5*y + + mulsd xmm2,qword ptr real_5_0[rip] # 5*y*.. + + addsd xmm0,xmm2 + + addsd xmm0,xmm10 # 2+5*y+5*y * .. + + ret + +tan_real_n_2_0_4_p: + ucomisd xmm0,qword ptr real_n_2pi_m_1_04[rip] + subsd xmm1,xmm0 + att_jb tan_real_s_2_p + att_jmp tan_real_2_0_p + +tan_real_n_2_0_4_n: + ucomisd xmm0,qword ptr real_n_pi_p_1_04[rip] + subsd xmm1,xmm0 + att_ja tan_real_s_2_n + att_jmp tan_real_2_0_n + +tan_real_n_2_0_2_n: + ucomisd xmm0,qword ptr real_n_pi_m_1_04[rip] + subsd xmm1,xmm0 + att_jb tan_real_s_2_p + att_jmp tan_real_2_0_p + +tan_real_2_0_2_p: + ucomisd xmm0,qword ptr real_pi_p_1_04[rip] + subsd xmm1,xmm0 + att_jb tan_real_s_2_p + +tan_real_2_0_p: + movlpd xmm0,qword ptr real_h_pi_m_atan_2_52[rip] + movlpd xmm2,qword ptr real_h_pi_m_atan_2_52_l[rip] + movlpd xmm10,qword ptr real_48_bits[rip] + + subsd xmm0,xmm1 # (0.5pi-atan 2)_h-x_h + subsd xmm2,xmm12 # (0.5pi-atan 2)_l-x_l + andpd xmm10,xmm1 # x_h + + movlpd xmm11,qword ptr real_m_5_0[rip] + addsd xmm2,xmm0 # y=0.5pi-atan 2-x + subsd xmm1,xmm10 # x_h_l + mulsd xmm10,xmm11 # -5x_h + + movsd xmm0,xmm2 # y + mulsd xmm2,xmm2 # y2 + + addsd xmm1,xmm12 # x_h_l+x_l + addsd xmm10,qword ptr real_2_p_2_5_pi_m_5_atan_2_53[rip] # -5x+(2+2.5pi-5atan 2)=2+5y + + movlpd xmm12,qword ptr real_2_p_2_5_pi_m_5_atan_2_53_l[rip] + movlpd xmm13,qword ptr real_5_0[rip] + jmp tan_real_2_0_pn + +tan_real_2_0_4_n: + ucomisd xmm0,qword ptr real_2pi_m_1_04[rip] + subsd xmm1,xmm0 + att_ja tan_real_s_2_n + att_jmp tan_real_2_0_n + +tan_real_2_0_2_n: + ucomisd xmm0,qword ptr real_pi_m_1_04[rip] + subsd xmm1,xmm0 + att_ja tan_real_s_2_n + +tan_real_2_0_n: + movlpd xmm0,qword ptr real_h_pi_m_atan_2_52[rip] + movlpd xmm2,qword ptr real_h_pi_m_atan_2_52_l[rip] + movlpd xmm10,qword ptr real_48_bits[rip] + + addsd xmm0,xmm1 # (0.5pi-atan 2)_h+x_h + addsd xmm2,xmm12 # (0.5pi-atan 2)_l+x_l + andpd xmm10,xmm1 # x_h + + movlpd xmm11,qword ptr real_m_5_0[rip] + addsd xmm2,xmm0 # y=0.5pi-atan 2+x + subsd xmm1,xmm10 # x_h_l + mulsd xmm10,xmm11 # -5x_h + + movsd xmm0,xmm2 # y + mulsd xmm2,xmm2 # y2 + + addsd xmm1,xmm12 # x_h_l+x_l + subsd xmm10,qword ptr real_2_p_2_5_pi_m_5_atan_2_53[rip] # -5x-(2+2.5pi-5atan 2)=-2-5y + + movlpd xmm12,qword ptr real_5_atan_2_m_2_m_2_5_pi_53_l[rip] + movlpd xmm13,qword ptr real_m_5_0[rip] + +tan_real_2_0_pn: + movlpd xmm7,qword ptr tan3_q_1[rip] + movlpd xmm6,qword ptr tan3_q_2[rip] + + movsd xmm3,xmm2 # y2 + mulsd xmm2,xmm2 # y4 + + movlpd xmm9,qword ptr tan3_q_3[rip] + mulsd xmm7,xmm3 # q2 + + movsd xmm4,xmm3 # y2 + mulsd xmm3,xmm2 # y6 + + movlpd xmm8,qword ptr tan3_p_0[rip] + + movsd xmm5,xmm2 # y4 + mulsd xmm2,xmm2 # y8 + + mulsd xmm9,xmm3 # q6 + mulsd xmm3,qword ptr tan3_p_2[rip] # p6 + + mulsd xmm6,xmm5 # q4 + mulsd xmm5,qword ptr tan3_p_1[rip] # p4 + + addsd xmm2,xmm9 # y8+q6 + + mulsd xmm8,xmm4 # p2 + + addsd xmm2,xmm6 # y8+q6+q4 + + movlpd xmm6,qword ptr real_2_0[rip] + mulsd xmm1,xmm11 # -5x_l + + addsd xmm3,xmm5 # p6+p4 + addsd xmm2,xmm7 # y8+q6+q4+q2 + + mulsd xmm6,xmm0 # 2*y + + addsd xmm3,xmm8 # p6+p4+p2 + addsd xmm2,qword ptr tan3_q_0[rip] # y8+q6+q4+q2+q0 + + movsd xmm4,xmm3 # p + addsd xmm4,xmm2 # p+q + + mulsd xmm4,xmm6 # 2*y*(p+q) + + addsd xmm3,xmm4 # p+y*(p+q) + subsd xmm2,xmm4 # q-y*(p+q) + + divsd xmm3,xmm2 # (p-y*(p+q))/(q+y*(p+q)) + + addsd xmm1,xmm12 + + mulsd xmm0,xmm3 # y * (p-y*(p+q))/(q+y*(p+q)) + + mulsd xmm0,xmm13 # -5*y*.. + + addsd xmm0,xmm1 + + addsd xmm0,xmm10 # -2-5*y-5*y * .. + + ret + +tan_real_s_2_p: + movlpd xmm0,qword ptr real_h_pi_m_atan_s_2_53[rip] + movlpd xmm2,qword ptr real_h_pi_m_atan_s_2_53_l[rip] + movlpd xmm10,qword ptr real_49_bits[rip] + + subsd xmm0,xmm1 # (0.5pi-atan sqrt 2)_h-x_h + subsd xmm2,xmm12 # (0.5pi-atan sqrt 2)_l-x_l + andpd xmm10,xmm1 # x_h + + movlpd xmm11,qword ptr real_m_3_0[rip] + addsd xmm2,xmm0 # y=0.5pi-atan 2+x + subsd xmm1,xmm10 # x_h_l + mulsd xmm10,xmm11 # -3x_h + + movsd xmm0,xmm2 # y + mulsd xmm2,xmm2 # y2 + + addsd xmm1,xmm12 # x_h_l+x_l + addsd xmm10,qword ptr real_sqrt_2_p_1_5_pi_m_3_atan_sqrt_2_53[rip] # -3x+(2+2.5pi-5atan 2)=sqrt 2+3y + + movlpd xmm12,qword ptr real_sqrt_2_p_1_5_pi_m_3_atan_sqrt_2_53_l[rip] + movlpd xmm13,qword ptr real_3_0[rip] + jmp tan_real_s_2_pn + +tan_real_s_2_n: + movlpd xmm0,qword ptr real_h_pi_m_atan_s_2_53[rip] + movlpd xmm2,qword ptr real_h_pi_m_atan_s_2_53_l[rip] + movlpd xmm10,qword ptr real_49_bits[rip] + + addsd xmm0,xmm1 # (0.5pi-atan sqrt 2)_h+x + addsd xmm2,xmm12 # (0.5pi-atan sqrt 2)_l+x_l + andpd xmm10,xmm1 # x_h + + movlpd xmm11,qword ptr real_m_3_0[rip] + addsd xmm2,xmm0 # y=0.5pi-atan 2+x + subsd xmm1,xmm10 # x_h_l + mulsd xmm10,xmm11 # -3x_h + + movsd xmm0,xmm2 # y + mulsd xmm2,xmm2 # y2 + + addsd xmm1,xmm12 # x_h_l+x_l + subsd xmm10,qword ptr real_sqrt_2_p_1_5_pi_m_3_atan_sqrt_2_53[rip] # -3x-(2+2.5pi-5atan 2)=-sqrt 2-3y + + movlpd xmm12,qword ptr real_3_atan_sqrt_2_m_sqrt_2_m_1_5_pi_m_53_l[rip] + movlpd xmm13,qword ptr real_m_3_0[rip] + +tan_real_s_2_pn: + movlpd xmm7,qword ptr tan3_q_1[rip] + movlpd xmm6,qword ptr tan3_q_2[rip] + + movsd xmm3,xmm2 # y2 + mulsd xmm2,xmm2 # y4 + + movlpd xmm9,qword ptr tan3_q_3[rip] + mulsd xmm7,xmm3 # q2 + + movsd xmm4,xmm3 # y2 + mulsd xmm3,xmm2 # y6 + + movlpd xmm8,qword ptr tan3_p_0[rip] + + movsd xmm5,xmm2 # y4 + mulsd xmm2,xmm2 # y8 + + mulsd xmm9,xmm3 # q6 + mulsd xmm3,qword ptr tan3_p_2[rip] # p6 + + mulsd xmm6,xmm5 # q4 + mulsd xmm5,qword ptr tan3_p_1[rip] # p4 + + addsd xmm2,xmm9 # y8+q6 + + mulsd xmm8,xmm4 # p2 + + addsd xmm2,xmm6 # y8+q6+q4 + + movlpd xmm6,qword ptr sqrt_2_0[rip] + mulsd xmm1,xmm11 # -3x_l + + addsd xmm3,xmm5 # p6+p4 + addsd xmm2,xmm7 # y8+q6+q4+q2 + + mulsd xmm6,xmm0 # sqrt 2*y + + addsd xmm3,xmm8 # p6+p4+p2 + addsd xmm2,qword ptr tan3_q_0[rip] # y8+q6+q4+q2+q0 + + movsd xmm4,xmm3 # p + addsd xmm4,xmm2 # p+q + + mulsd xmm4,xmm6 # sqrt 2*y*(p+q) + mulsd xmm0,xmm13 # -3*y + + addsd xmm3,xmm4 # p+sqrt 2*y*(p+q) + subsd xmm2,xmm4 # q-sqrt 2*y*(p+q) + + divsd xmm3,xmm2 # (p-y*(p+q))/(q+y*(p+q)) + + addsd xmm1,xmm12 + + mulsd xmm0,xmm3 # -3*y * (p-y*(p+q))/(q+y*(p+q)) + + addsd xmm0,xmm1 + + addsd xmm0,xmm10 # -sqrt 2-3*y-3*y * .. + + ret + +tan_real_1_0_2: + subsd xmm1,xmm0 + +tan_real_1_0_n: + movlpd xmm0,qword ptr real_m_pi_d_4[rip] + movlpd xmm2,qword ptr real_m_pi_d_4_l[rip] + + subsd xmm0,xmm1 # -(pi/4)_h-x_h + subsd xmm2,xmm12 # -(pi/4)_l-x_l + mulsd xmm1,qword ptr real_m_2_0[rip] # -2x + + addsd xmm2,xmm0 # y=-pi/4-x + addsd xmm12,xmm12 # 2x_l + subsd xmm1,qword ptr real_pi_d_2_p_1_0[rip] # -2x-(pi/2+1)=-1-2y + + movlpd xmm10,qword ptr real_pi_d_2_p_1_0_l[rip] + movlpd xmm11,qword ptr real_2_0[rip] + jmp tan_real_1_0_pn + +tan_real_n_1_0_2: + subsd xmm1,xmm0 + +tan_real_1_0_2_p: + movlpd xmm0,qword ptr real_m_pi_d_4[rip] + movlpd xmm2,qword ptr real_m_pi_d_4_l[rip] + movlpd xmm11,qword ptr real_m_2_0[rip] + + addsd xmm0,xmm1 # -(pi/4)_h+x_h + addsd xmm2,xmm12 # -(pi/4)_l+x_l + mulsd xmm1,xmm11 # -2x + + addsd xmm2,xmm0 # y=pi/4-x + addsd xmm12,xmm12 # 2x_l + addsd xmm1,qword ptr real_pi_d_2_p_1_0[rip] # (pi/2+1)-2x-=1+2y + + movlpd xmm10,qword ptr real_m_pi_d_2_p_1_0_l[rip] + att_jmp tan_real_1_0_pn + +tan_real_n_5: + ucomisd xmm0,qword ptr real_n_2pi_m_0_338[rip] + movlpd xmm12,qword ptr real_m_2_pi_l[rip] + movlpd xmm1,qword ptr real_2_pi[rip] + ja tan_real_n_s_0_5_5_p + ucomisd xmm0,qword ptr real_n_2pi_p_0_338[rip] + ja tan_real_5_n + ucomisd xmm0,qword ptr real_n_2pi_p_0_699[rip] + ja tan_real_n_s_0_5_5_n + addsd xmm0,xmm1 + att_jmp tan_real_1_0_3_n + +tan_real_5: + ucomisd xmm0,qword ptr real_2pi_m_0_338[rip] + movlpd xmm12,qword ptr real_2_pi_l[rip] + movlpd xmm1,qword ptr real_2_pi[rip] + jb tan_real_s_0_5_5 + ucomisd xmm0,qword ptr real_2pi_p_0_338[rip] + jb tan_real_5_0 + ucomisd xmm0,qword ptr real_2pi_p_0_699[rip] + jb tan_real_s_0_5_5_p + subsd xmm0,xmm1 + att_jmp tan_real_1_0_3 + +tan_real_n_3: + ucomisd xmm0,qword ptr real_n_pi_m_0_338[rip] + movlpd xmm12,qword ptr real_m_pi_l[rip] + movlpd xmm1,qword ptr real_pi[rip] + ja tan_real_n_s_0_5_3_p + ucomisd xmm0,qword ptr real_n_pi_p_0_338[rip] + ja tan_real_3_n + ucomisd xmm0,qword ptr real_n_pi_p_0_699[rip] + ja tan_real_n_s_0_5_3_n + addsd xmm0,xmm1 + att_jmp tan_real_1_0_3_n + +tan_real_3: + ucomisd xmm0,qword ptr real_pi_m_0_338[rip] + movlpd xmm1,qword ptr real_pi[rip] + movlpd xmm12,qword ptr real_pi_l[rip] + jb tan_real_s_0_5_3 + ucomisd xmm0,qword ptr real_pi_p_0_338[rip] + jb tan_real_3_0 + ucomisd xmm0,qword ptr real_pi_p_0_699[rip] + jb tan_real_s_0_5_3_p + subsd xmm0,xmm1 + att_jmp tan_real_1_0_3 + +tan_real_5_n: +tan_real_3_n: + addsd xmm0,xmm1 + att_jmp tan_real_3_ + +tan_real_5_0: +tan_real_3_0: + subsd xmm0,xmm1 + +tan_real_3_: + movlpd xmm2,qword ptr real_26_bits[rip] + + andpd xmm2,xmm0 # y_1_h + movsd xmm1,xmm0 # y_1 + + subsd xmm1,xmm12 # y + movsd xmm8,xmm0 # y_1 + subsd xmm0,xmm2 # y_1-y_1_h + + movsd xmm3,xmm2 # y_1_h + addsd xmm2,xmm1 # y+y_1_h + subsd xmm0,xmm12 # y_1_l + + mulsd xmm3,xmm3 # y_1_h^2 + mulsd xmm0,xmm2 # (y+y_1_h)*y_1_l + + addsd xmm0,xmm3 # y^2 + + subsd xmm8,xmm1 # y_1-y + + movlpd xmm6,qword ptr tan_q_1[rip] + movlpd xmm5,qword ptr tan_q_2[rip] + + subsd xmm8,xmm12 # y_s + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movlpd xmm7,qword ptr tan_p_1[rip] + mulsd xmm6,xmm2 # q2 + + movsd xmm3,xmm2 # x2 + mulsd xmm2,xmm0 # x6 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + mulsd xmm5,xmm4 # q4 + mulsd xmm2,qword ptr tan_q_3[rip] # q6 + + mulsd xmm4,qword ptr tan_p_2[rip] # p4 + + addsd xmm0,xmm2 # x8+q6 + + mulsd xmm7,xmm3 # p2 + + addsd xmm0,xmm5 # x8+q6+q4 + + addsd xmm4,xmm7 # p4+p2 + + addsd xmm0,xmm6 # x8+q6+q4+q2 + + addsd xmm4,qword ptr tan_p_0[rip] # p4+p2+p0 + + addsd xmm0,qword ptr tan_q_0[rip] # x8+q6+q4+q2+q0 + + divsd xmm4,xmm0 # p/q + + mulsd xmm3,xmm1 # x3 + movsd xmm0,xmm1 # x + +# movlpd xmm2,qword ptr real_0_125 + + mulsd xmm3,xmm4 + +# ucomisd xmm3,xmm2 +# jae tan_real_3_1 + + addsd xmm3,xmm8 + + addsd xmm0,xmm3 + + ret + +#tan_real_3_1: +# subsd xmm3,xmm2 +# addsd xmm0,xmm2 + +# addsd xmm3,xmm8 + +# addsd xmm0,xmm3 + +# ret + +tan_real_1_0_3: + movlpd xmm1,qword ptr real_pi_d_4[rip] + movlpd xmm2,qword ptr real_pi_d_4_l[rip] + + subsd xmm1,xmm0 # (pi/4)_h-x + addsd xmm2,xmm12 # (pi/4)_l+n_x_l + mulsd xmm0,qword ptr real_2_0[rip] # 2x + + addsd xmm2,xmm1 # y=pi/4-x + movlpd xmm1,qword ptr real_1_0_m_pi_d_2_52[rip] + addsd xmm12,xmm12 # 2n_x_l + addsd xmm1,xmm0 # 2x+(1-pi/2)=1-2y + + movlpd xmm11,qword ptr real_m_2_0[rip] + movlpd xmm10,qword ptr real_pi_d_2_m_1_0_52_l[rip] + + att_jmp tan_real_1_0_pn + +tan_real_1_0_3_n: + movlpd xmm1,qword ptr real_pi_d_4[rip] + movlpd xmm2,qword ptr real_pi_d_4_l[rip] + + addsd xmm1,xmm0 + subsd xmm2,xmm12 # (pi/4)_l-n_x_l + mulsd xmm0,qword ptr real_2_0[rip] # 2x + + addsd xmm2,xmm1 # y=pi/4+x + movlpd xmm1,qword ptr real_pi_d_2_m_1_0_52[rip] + addsd xmm12,xmm12 # 2n_x_l + addsd xmm1,xmm0 # 2x+(pi/2-1)=-1+2y + + movlpd xmm11,qword ptr real_2_0[rip] + movlpd xmm10,qword ptr real_m_pi_d_2_m_1_0_52_l[rip] + +tan_real_1_0_pn: + movsd xmm0,xmm2 # y + mulsd xmm2,xmm2 # y2 + + movlpd xmm7,qword ptr tan3_q_1[rip] + movlpd xmm6,qword ptr tan3_q_2[rip] + + movsd xmm3,xmm2 # y2 + mulsd xmm2,xmm2 # y4 + + movlpd xmm9,qword ptr tan3_q_3[rip] + mulsd xmm7,xmm3 # q2 + + movsd xmm4,xmm3 # y2 + mulsd xmm3,xmm2 # y6 + + movlpd xmm8,qword ptr tan3_p_0[rip] + + movsd xmm5,xmm2 # y4 + mulsd xmm2,xmm2 # y8 + + mulsd xmm9,xmm3 # q6 + mulsd xmm3,qword ptr tan3_p_2[rip] # p6 + + mulsd xmm6,xmm5 # q4 + mulsd xmm5,qword ptr tan3_p_1[rip] # p4 + + addsd xmm2,xmm9 # y8+q6 + + mulsd xmm8,xmm4 # p2 + + addsd xmm2,xmm6 # y8+q6+q4 + + addsd xmm3,xmm5 # p6+p4 + addsd xmm2,xmm7 # y8+q6+q4+q2 + + addsd xmm3,xmm8 # p6+p4+p2 + addsd xmm2,qword ptr tan3_q_0[rip] # y8+q6+q4+q2+q0 + + movsd xmm4,xmm3 # p + addsd xmm4,xmm2 # p+q + + mulsd xmm4,xmm0 # y*(p+q) + mulsd xmm0,xmm11 # -2y | 2y + + subsd xmm3,xmm4 # p-y*(p+q) + addsd xmm2,xmm4 # q+y*(p+q) + + divsd xmm3,xmm2 # (p-y*(p+q))/(q+y*(p+q)) + + addsd xmm12,xmm10 + + mulsd xmm0,xmm3 # -2y * (p-y*(p+q))/(q+y*(p+q)) | 2y * (p-y*(p+q))/(q+y*(p+q)) + + subsd xmm0,xmm12 + + addsd xmm0,xmm1 # 1-2y-2y * .. | -1+2y+2y * .. + + ret + +tan_real_n_s_0_5_5_p: + ucomisd xmm0,qword ptr real_n_2pi_m_0_542[rip] + addsd xmm0,xmm1 + att_jb tan_real_0_5_p + att_jmp tan_real_s_0_5_p + +tan_real_s_0_5_5_p: + ucomisd xmm0,qword ptr real_2pi_p_0_542[rip] + subsd xmm0,xmm1 + att_jb tan_real_0_5_p + att_jmp tan_real_s_0_5_p + +tan_real_n_s_0_5_3_p: + ucomisd xmm0,qword ptr real_n_pi_m_0_542[rip] + addsd xmm0,xmm1 + att_jb tan_real_0_5_p + att_jmp tan_real_s_0_5_p + +tan_real_s_0_5_3_p: + ucomisd xmm0,qword ptr real_pi_p_0_542[rip] + subsd xmm0,xmm1 + att_jb tan_real_0_5_p + +tan_real_s_0_5_p: + movlpd xmm1,qword ptr atan_sqrt_0_5[rip] + movlpd xmm2,qword ptr atan_sqrt_0_5_l[rip] + movlpd xmm10,qword ptr real_51_bits[rip] + + subsd xmm1,xmm0 # atan sqrt 0.5_h-x + addsd xmm2,xmm12 # atan sqrt 0.5_l+n_x_l + andpd xmm10,xmm0 # x_h + + movlpd xmm11,qword ptr real_1_5[rip] + addsd xmm2,xmm1 # y=atan sqrt 0.5-x + subsd xmm0,xmm10 # x_h_l + mulsd xmm10,xmm11 # 1.5x_h + + movsd xmm1,xmm2 # y + mulsd xmm2,xmm2 # y2 + + subsd xmm0,xmm12 # x_h_l-n_x_l + addsd xmm10,qword ptr sqrtn_0_5_m_1_5_atann_sqrt_0_5[rip] # 1.5x+(sqrt 0.5-1.5atan sqrt 0.5)=sqrt 0.5+1.5y + + movlpd xmm12,qword ptr sqrtn_0_5_m_1_5_atann_sqrt_0_5_l[rip] + movlpd xmm13,qword ptr real_m_1_5[rip] + movlpd xmm14,qword ptr sqrt_0_5[rip] + jmp tan_real_0_5_a_s_0_5_3_pn + +tan_real_n_s_0_5_5_n: + ucomisd xmm0,qword ptr real_n_2pi_p_0_542[rip] + addsd xmm0,xmm1 + att_ja tan_real_0_5_n + att_jmp tan_real_s_0_5_n + +tan_real_s_0_5_5: + ucomisd xmm0,qword ptr real_2pi_m_0_542[rip] + subsd xmm0,xmm1 + att_ja tan_real_0_5_n + att_jmp tan_real_s_0_5_n + +tan_real_n_s_0_5_3_n: + ucomisd xmm0,qword ptr real_n_pi_p_0_542[rip] + addsd xmm0,xmm1 + att_ja tan_real_0_5_n + att_jmp tan_real_s_0_5_n + +tan_real_s_0_5_3: + ucomisd xmm0,qword ptr real_pi_m_0_542[rip] + subsd xmm0,xmm1 + att_ja tan_real_0_5_n + +tan_real_s_0_5_n: + movlpd xmm1,qword ptr atan_sqrt_0_5[rip] + movlpd xmm2,qword ptr atan_sqrt_0_5_l[rip] + movlpd xmm10,qword ptr real_51_bits[rip] + + addsd xmm1,xmm0 # x_h+atan sqrt 0.5_h + subsd xmm2,xmm12 # -n_x_l+atan sqrt 0.5_l + andpd xmm10,xmm0 # x_h + + movlpd xmm11,qword ptr real_1_5[rip] + addsd xmm2,xmm1 # y=x+atan sqrt 0.5 + subsd xmm0,xmm10 # x_h_l + mulsd xmm10,xmm11 # 1.5x_h + + movsd xmm1,xmm2 # y + mulsd xmm2,xmm2 # y2 + + subsd xmm0,xmm12 # x_h_l-n_x_l + subsd xmm10,qword ptr sqrtn_0_5_m_1_5_atann_sqrt_0_5[rip] # 1.5x-(sqrt 0.5-1.5atan sqrt 0.5)=-sqrt 0.5+1.5y + + movlpd xmm12,qword ptr n_sqrtn_0_5_m_1_5_atann_sqrt_0_5_l[rip] + movlpd xmm13,qword ptr real_1_5[rip] + movlpd xmm14,qword ptr sqrt_0_5[rip] + +tan_real_0_5_a_s_0_5_3_pn: + movlpd xmm7,qword ptr tan3_q_1[rip] + movlpd xmm6,qword ptr tan3_q_2[rip] + + movsd xmm3,xmm2 # y2 + mulsd xmm2,xmm2 # y4 + + movlpd xmm9,qword ptr tan3_q_3[rip] + mulsd xmm7,xmm3 # q2 + + movsd xmm4,xmm3 # y2 + mulsd xmm3,xmm2 # y6 + + movlpd xmm8,qword ptr tan3_p_0[rip] + + movsd xmm5,xmm2 # y4 + mulsd xmm2,xmm2 # y8 + + mulsd xmm9,xmm3 # q6 + mulsd xmm3,qword ptr tan3_p_2[rip] # p6 + + mulsd xmm6,xmm5 # q4 + mulsd xmm5,qword ptr tan3_p_1[rip] # p4 + + addsd xmm2,xmm9 # y8+q6 + + mulsd xmm8,xmm4 # p2 + + addsd xmm2,xmm6 # y8+q6+q4 + + mulsd xmm0,xmm11 # 1.25|1.5x_l + + addsd xmm3,xmm5 # p6+p4 + addsd xmm2,xmm7 # y8+q6+q4+q2 + + mulsd xmm14,xmm1 # 0.5|sqrt 0.5*y + + addsd xmm3,xmm8 # p6+p4+p2 + addsd xmm2,qword ptr tan3_q_0[rip] # y8+q6+q4+q2+q0 + + movsd xmm4,xmm3 # p + addsd xmm4,xmm2 # p+q + + mulsd xmm4,xmm14 # 0.5|sqrt 0.5*y*(p+q) + + subsd xmm3,xmm4 # p-sqrt 0.5*y*(p+q) + addsd xmm2,xmm4 # q+sqrt 0.5*y*(p+q) + + divsd xmm3,xmm2 # (p-y*(p+q))/(q+y*(p+q)) + + addsd xmm0,xmm12 + + mulsd xmm1,xmm3 # y * (p-y*(p+q))/(q+y*(p+q)) + + mulsd xmm1,xmm13 # 1.25|1.5*y*.. + + addsd xmm0,xmm1 + + addsd xmm0,xmm10 # 0.5|sqrt 0.5+1.25|1.5*y+1.25|1.5*y * .. + + ret + +tan_real_0_5_p: + movlpd xmm1,qword ptr atan_0_5_53[rip] + movlpd xmm2,qword ptr atan_0_5_53_l[rip] + movlpd xmm10,qword ptr real_50_bits[rip] + + subsd xmm1,xmm0 # atan 0.5_h-x + addsd xmm2,xmm12 # atan 0.5_l-n_x_l + andpd xmm10,xmm0 # x_h + + movlpd xmm11,qword ptr real_1_25[rip] + addsd xmm2,xmm1 # y=x+atan 0.5 + subsd xmm0,xmm10 # x_h_l + mulsd xmm10,xmm11 # 1.25x_h + + movsd xmm1,xmm2 # y + mulsd xmm2,xmm2 # y2 + + subsd xmm0,xmm12 # x_h_l-n_x_l + addsd xmm10,qword ptr real_0_5_m_1_25_atan_0_5_49[rip] # 1.25x+(0.5-1.25atan 0.5)=0.5+1.25y + + movlpd xmm12,qword ptr real_0_5_m_1_25_atan_0_5_49_l[rip] + movlpd xmm13,qword ptr real_m_1_25[rip] + movlpd xmm14,qword ptr real_0_5[rip] + att_jmp tan_real_0_5_a_s_0_5_3_pn + +tan_real_0_5_n: + movlpd xmm1,qword ptr atan_0_5_53[rip] + movlpd xmm2,qword ptr atan_0_5_53_l[rip] + movlpd xmm10,qword ptr real_50_bits[rip] + + addsd xmm1,xmm0 # x_h+atan 0.5_h + subsd xmm2,xmm12 # -n_x_l+atan 0.5_l + andpd xmm10,xmm0 # x_h + + movlpd xmm11,qword ptr real_1_25[rip] + addsd xmm2,xmm1 # y=x+atan 0.5 + subsd xmm0,xmm10 # x_h_l + mulsd xmm10,xmm11 # 1.25x_h + + movsd xmm1,xmm2 # y + mulsd xmm2,xmm2 # y2 + + subsd xmm0,xmm12 # x_h_l-n_x_l + subsd xmm10,qword ptr real_0_5_m_1_25_atan_0_5_49[rip] # 1.25x-(0.5-1.25atan 0.5)=-0.5+1.25y + + movlpd xmm12,qword ptr real_1_25_atan_0_5_m_0_5_49_l[rip] + movlpd xmm13,qword ptr real_1_25[rip] + movlpd xmm14,qword ptr real_0_5[rip] + att_jmp tan_real_0_5_a_s_0_5_3_pn + + +sin_cos_or_tan_real_too_large: +sin_cos_or_tan_real_too_small_or_nan: + subsd xmm0,xmm0 + ret + +rem_36825084_pi: + movsd xmm1,xmm0 + divsd xmm0,qword ptr real_36825084_pi[rip] + + movq xmm4,qword ptr round_c[rip] + movq xmm2,xmm0 + psrlq xmm0,52 + psubq xmm4,xmm0 + movq xmm5,qword ptr mask_all_one[rip] + psllq xmm5,xmm4 + andpd xmm5,xmm2 + + movlpd xmm2,qword ptr real_36825084_pi_27[rip] + movlpd xmm3,qword ptr real_36825084_pi_27_27[rip] + movlpd xmm4,qword ptr real_36825084_pi_54_l[rip] + + movsd xmm0,xmm1 + + mulsd xmm2,xmm5 + mulsd xmm3,xmm5 + mulsd xmm4,xmm5 + + subsd xmm0,xmm2 + subsd xmm0,xmm3 + movsd xmm7,xmm0 + subsd xmm0,xmm4 + + subsd xmm7,xmm0 + subsd xmm7,xmm4 + + movlpd xmm1,qword ptr real_4_d_pi[rip] + + mulsd xmm1,xmm0 # x*4/pi + + ucomisd xmm1,qword ptr real_3_0[rip] + jae rem_36825084_pi_g3 + + ucomisd xmm1,qword ptr real_1_0[rip] + jae rem_36825084_pi_g1 + + movlpd xmm1,qword ptr real_0_0[rip] + movsd xmm5,xmm1 + movsd xmm6,xmm1 + ret + +rem_36825084_pi_g1: + movlpd xmm1,qword ptr real_2_0[rip] + movlpd xmm5,qword ptr real_0_0[rip] + movsd xmm6,xmm1 + ret + +rem_36825084_pi_g3: + addsd xmm1,qword ptr real_1_0[rip] # x*4/pi+1 + + movq xmm4,qword ptr round_even_c[rip] + movq xmm2,xmm1 + psrlq xmm1,52 + psubq xmm4,xmm1 + movq xmm1,qword ptr mask_all_one[rip] + psllq xmm1,xmm4 + andpd xmm1,xmm2 # round_even (x*4/pi+1) + + movq xmm5,qword ptr mask_all_one_except_last[rip] + movq xmm6,qword ptr mask_all_one_except_second_last[rip] + psllq xmm5,xmm4 + psllq xmm6,xmm4 + ret + +rem_n_36825084_pi: + movsd xmm1,xmm0 + divsd xmm0,qword ptr real_36825084_pi[rip] + + movq xmm4,qword ptr m_round_c[rip] + movq xmm2,xmm0 + psrlq xmm0,52 + psubq xmm4,xmm0 + movq xmm5,qword ptr mask_all_one[rip] + psllq xmm5,xmm4 + andpd xmm5,xmm2 + + movlpd xmm2,qword ptr real_36825084_pi_27[rip] + movlpd xmm3,qword ptr real_36825084_pi_27_27[rip] + movlpd xmm4,qword ptr real_36825084_pi_54_l[rip] + + movsd xmm0,xmm1 + + mulsd xmm2,xmm5 + mulsd xmm3,xmm5 + mulsd xmm4,xmm5 + + subsd xmm0,xmm2 + subsd xmm0,xmm3 + movsd xmm7,xmm0 + subsd xmm0,xmm4 + + subsd xmm7,xmm0 + subsd xmm7,xmm4 + + movlpd xmm1,qword ptr real_4_d_pi[rip] + + mulsd xmm1,xmm0 # x*4/pi + + ucomisd xmm1,qword ptr real_m_3_0[rip] + jbe rem_n_36825084_pi_g3 + + ucomisd xmm1,qword ptr real_m_1_0[rip] + jbe rem_n_36825084_pi_g1 + + movlpd xmm1,qword ptr real_0_0[rip] + movsd xmm5,xmm1 + movsd xmm6,xmm1 + ret + +rem_n_36825084_pi_g1: + movlpd xmm1,qword ptr real_m_2_0[rip] + movlpd xmm5,qword ptr real_0_0[rip] + movsd xmm6,xmm1 + ret + +rem_n_36825084_pi_g3: + subsd xmm1,qword ptr real_1_0[rip] # x*4/pi-1 + + movq xmm4,qword ptr m_round_even_c[rip] + movq xmm2,xmm1 + psrlq xmm1,52 + psubq xmm4,xmm1 + movq xmm1,qword ptr mask_all_one[rip] + psllq xmm1,xmm4 + andpd xmm1,xmm2 # round_even (x*4/pi-1) + + movq xmm5,qword ptr mask_all_one_except_last[rip] + movq xmm6,qword ptr mask_all_one_except_second_last[rip] + psllq xmm5,xmm4 + psllq xmm6,xmm4 + ret + + + .globl asin_real + +asin_real: + ucomisd xmm0,qword ptr real_0_4[rip] + + lea rcx,asin_c[rip] + + ja asin_real_2 + + ucomisd xmm0,qword ptr real_m_0_4[rip] + jb asin_real_3 # x<0.55 | NAN + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + movlpd xmm6,qword ptr (asin_q_4-asin_c)[rcx] + movlpd xmm7,qword ptr (asin_q_3-asin_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movlpd xmm8,qword ptr (asin_q_2-asin_c)[rcx] + movlpd xmm9,qword ptr (asin_q_1-asin_c)[rcx] + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + movlpd xmm10,qword ptr (asin_p_1-asin_c)[rcx] + + movsd xmm3,xmm2 # x2 + mulsd xmm3,xmm4 # x6 + + mulsd xmm6,xmm0 # q8 + movsd xmm5,xmm0 # x8 + mulsd xmm0,xmm2 # x10 + + mulsd xmm5,qword ptr (asin_p_4-asin_c)[rcx] # p8 + + mulsd xmm7,xmm3 # q6 + mulsd xmm3,qword ptr (asin_p_3-asin_c)[rcx] # p6 + + addsd xmm0,xmm6 # x10+q8 + + mulsd xmm8,xmm4 # q4 + mulsd xmm4,qword ptr (asin_p_2-asin_c)[rcx] # p4 + + addsd xmm0,xmm7 # x10+q8+q6 + addsd xmm3,xmm5 # p8+p6 + + mulsd xmm9,xmm2 # q2 + mulsd xmm10,xmm2 # p2 + + addsd xmm0,xmm8 # x10+q8+q6+q4 + addsd xmm3,xmm4 # p8+p6+p4 + + addsd xmm0,xmm9 # x10+q8+q6+q4+q2 + addsd xmm3,xmm10 # p8+p6+p4+p2 + + addsd xmm0,qword ptr (asin_q_0-asin_c)[rcx] # x10+q8+q6+q4+q2+q0 + addsd xmm3,qword ptr (asin_p_0-asin_c)[rcx] # p8+p6+p4+p2+p0 + + divsd xmm3,xmm0 # p/q + + mulsd xmm2,xmm1 # x3 + movsd xmm0,xmm1 # x + + mulsd xmm2,xmm3 + addsd xmm0,xmm2 + + ret + +asin_real_2: + ucomisd xmm0,qword ptr real_0_675[rip] + jb asin_real_0_54 + + movlpd xmm1,qword ptr real_1_0[rip] + + ucomisd xmm0,xmm1 + subsd xmm1,xmm0 + + jae asin_real_1_or_e + + movlpd xmm11,qword ptr real_2_0[rip] + + movsd xmm0,xmm1 # x + mulsd xmm1,xmm1 # x2 + + mulsd xmm11,xmm0 # 2x + + movlpd xmm7,qword ptr (asin2_q_3-asin_c)[rcx] + + movsd xmm2,xmm1 # x2 + mulsd xmm1,xmm1 # x4 + + sqrtsd xmm12,xmm11 # sqrt 2x + + movlpd xmm5,qword ptr (asin2_p_3-asin_c)[rcx] + movlpd xmm8,qword ptr (asin2_q_2-asin_c)[rcx] + + movsd xmm4,xmm1 # x4 + + movlpd xmm9,qword ptr (asin2_q_1-asin_c)[rcx] + movlpd xmm10,qword ptr (asin2_p_1-asin_c)[rcx] + + movsd xmm3,xmm2 # x2 + mulsd xmm3,xmm0 # x3 + + mulsd xmm4,qword ptr (asin2_p_4-asin_c)[rcx] # p4 + + mulsd xmm7,xmm3 # q3 + mulsd xmm5,xmm3 # p3 + + mulsd xmm8,xmm2 # q2 + mulsd xmm2,qword ptr (asin2_p_2-asin_c)[rcx] # p2 + + addsd xmm1,xmm7 # x4+q3 + addsd xmm4,xmm5 # p4+p3 + + mulsd xmm9,xmm0 # q1 + mulsd xmm10,xmm0 # p1 + + addsd xmm1,xmm8 # x5+q4+q3+q2 + addsd xmm4,xmm2 # p4+p3+p2 + + addsd xmm1,xmm9 # x5+q4+q3+q2+q1 + addsd xmm4,xmm10 # p4+p3+p2+p1 + + movlpd xmm13,qword ptr real_21_bits[rip] + + addsd xmm1,qword ptr (asin2_q_0-asin_c)[rcx] # x5+q4+q3+q2+q1+q0 + addsd xmm4,qword ptr (asin2_p_0-asin_c)[rcx] # p4+p3+p2+p1+p0 + + andpd xmm13,xmm12 # s21 + + divsd xmm4,xmm1 # p/q + + movsd xmm14,xmm13 # s21 + movsd xmm15,xmm12 # s + addsd xmm12,xmm13 # s+s21 + + mulsd xmm13,xmm13 # s21*s21 + subsd xmm11,xmm13 # 2x-s21*s21 + divsd xmm11,xmm12 # sl + + addsd xmm14,qword ptr real_m_pi_d_2[rip] # s21-pi_d_2 + mulsd xmm4,xmm0 # x*p/q + + movlpd xmm0,qword ptr real_pi_d_2_l[rip] + subsd xmm0,xmm11 # pi_d_2_l-sl + + mulsd xmm4,xmm15 # s*x*p/q + + subsd xmm0,xmm4 # pi_d_2_l-sl-s*x*p/q + subsd xmm0,xmm14 # pi_d_2_l-sl-s*x*p/q-s21+pi_d_2 + + ret + +asin_real_3: + ucomisd xmm0,qword ptr real_m_0_675[rip] + ja asin_real_m_0_54 + + movlpd xmm1,qword ptr real_m_1_0[rip] + + ucomisd xmm0,xmm1 + jbe asin_real_m_1_or_e # x<=-1 | NAN + + subsd xmm0,xmm1 + + movlpd xmm11,qword ptr real_2_0[rip] + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + mulsd xmm11,xmm1 # 2x + + movlpd xmm7,qword ptr (asin2_q_3-asin_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + sqrtsd xmm12,xmm11 # sqrt 2x + + movlpd xmm5,qword ptr (asin2_p_3-asin_c)[rcx] + movlpd xmm8,qword ptr (asin2_q_2-asin_c)[rcx] + + movsd xmm4,xmm0 # x4 + + movlpd xmm9,qword ptr (asin2_q_1-asin_c)[rcx] + movlpd xmm10,qword ptr (asin2_p_1-asin_c)[rcx] + + movsd xmm3,xmm2 # x2 + mulsd xmm3,xmm1 # x3 + + mulsd xmm4,qword ptr (asin2_p_4-asin_c)[rcx] # p4 + + mulsd xmm7,xmm3 # q3 + mulsd xmm5,xmm3 # p3 + + mulsd xmm8,xmm2 # q2 + mulsd xmm2,qword ptr (asin2_p_2-asin_c)[rcx] # p2 + + addsd xmm0,xmm7 # x4+q3 + addsd xmm4,xmm5 # p4+p3 + + mulsd xmm9,xmm1 # q1 + mulsd xmm10,xmm1 # p1 + + addsd xmm0,xmm8 # x5+q4+q3+q2 + addsd xmm4,xmm2 # p4+p3+p2 + + addsd xmm0,xmm9 # x5+q4+q3+q2+q1 + addsd xmm4,xmm10 # p4+p3+p2+p1 + + movlpd xmm13,qword ptr real_21_bits[rip] + + addsd xmm0,qword ptr (asin2_q_0-asin_c)[rcx] # x5+q4+q3+q2+q1+q0 + addsd xmm4,qword ptr (asin2_p_0-asin_c)[rcx] # p4+p3+p2+p1+p0 + + andpd xmm13,xmm12 # s21 + + divsd xmm4,xmm0 # p/q + + movsd xmm14,xmm13 # s21 + movsd xmm15,xmm12 # s + addsd xmm12,xmm13 # s+s21 + mulsd xmm13,xmm13 # s21*s21 + subsd xmm11,xmm13 # 2x-s21*s21 + divsd xmm11,xmm12 # sl + + addsd xmm14,qword ptr real_m_pi_d_2[rip] # s21-pi_d_2 + mulsd xmm4,xmm1 # x*p/q + + movlpd xmm0,qword ptr real_m_pi_d_2_l[rip] + addsd xmm0,xmm11 # -pi_d_2_l+sl + + mulsd xmm4,xmm15 # s*x*p/q + + addsd xmm0,xmm4 # -pi_d_2_l+sl+s*x*p/q + addsd xmm0,xmm14 # -pi_d_2_l+sl+s*x*p/q+s21-pi_d_2 + + ret + +asin_real_m_0_54: + movlpd xmm1,qword ptr real_m_0_54000000017867999524[rip] + + ucomisd xmm0,xmm1 + subsd xmm0,xmm1 + + lea rcx,asin_p3_c[rip] + lea rdx,asin_p4_c[rip] + + cmova rcx,rdx + + jmp asin_real_pm_0_54 + +asin_real_0_54: + movlpd xmm1,qword ptr real_0_54000000017867999524[rip] + + ucomisd xmm0,xmm1 + subsd xmm0,xmm1 + + lea rcx,asin_p1_c[rip] + lea rdx,asin_p2_c[rip] + + cmovb rcx,rdx + +asin_real_pm_0_54: + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + movlpd xmm7,qword ptr (asin_p1_4-asin_p1_c)[rcx] + movlpd xmm8,qword ptr (asin_p1_3-asin_p1_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm7,xmm2 # p4 + mulsd xmm8,xmm2 # p3 + + movlpd xmm9,qword ptr (asin_p1_6-asin_p1_c)[rcx] + movlpd xmm10,qword ptr (asin_p1_5-asin_p1_c)[rcx] + + movsd xmm3,xmm2 # x2 + mulsd xmm2,xmm0 # x6 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + mulsd xmm9,xmm4 # p6 + mulsd xmm10,xmm4 # p5 + + movlpd xmm11,qword ptr (asin_p1_8-asin_p1_c)[rcx] + movlpd xmm12,qword ptr (asin_p1_10-asin_p1_c)[rcx] + + mulsd xmm4,xmm2 # x10 + + movlpd xmm13,qword ptr (asin_p1_12-asin_p1_c)[rcx] + movlpd xmm14,qword ptr (asin_p1_14-asin_p1_c)[rcx] + + movsd xmm6,xmm2 # x6 + mulsd xmm2,xmm2 # x12 + + mulsd xmm11,xmm6 # p8 + mulsd xmm6,qword ptr (asin_p1_7-asin_p1_c)[rcx] # p7 + + mulsd xmm12,xmm0 # p10 + mulsd xmm0,qword ptr (asin_p1_9-asin_p1_c)[rcx] # p9 + + mulsd xmm13,xmm4 # p12 + mulsd xmm4,qword ptr (asin_p1_11-asin_p1_c)[rcx] # p11 + + mulsd xmm14,xmm2 # p14 + mulsd xmm2,qword ptr (asin_p1_13-asin_p1_c)[rcx] # p13 + + addsd xmm13,xmm14 # p14+p12 + addsd xmm2,xmm4 # p13+p11 + + movlpd xmm15,qword ptr (asin_p1_2-asin_p1_c)[rcx] + + addsd xmm12,xmm13 # p14+p12+p10 + addsd xmm0,xmm2 # p13+p11+p9 + + mulsd xmm15,xmm3 # p2 + + addsd xmm11,xmm12 # p14+p12+p10+p8 + addsd xmm0,xmm6 # p13+p11+p9+p7 + + movlpd xmm2,qword ptr real_26_bits[rip] + movsd xmm4,xmm1 # x + movlpd xmm6,qword ptr (asin_p1_1l-asin_p1_c)[rcx] + movlpd xmm5,qword ptr (asin_p1_1h-asin_p1_c)[rcx] + + addsd xmm9,xmm11 # p14+p12+p10+p8+p6 + addsd xmm0,xmm10 # p13+p11+p9+p7+p5 + + andpd xmm2,xmm1 # x_h + mulsd xmm6,xmm1 # x*c1l + movlpd xmm13,qword ptr (asin_p1_0h-asin_p1_c)[rcx] + + addsd xmm7,xmm9 # p14+p12+p10+p8+p6+p4 + addsd xmm0,xmm8 # p13+p11+p9+p7+p5+p3 + + subsd xmm4,xmm2 # x_l + mulsd xmm2,xmm5 # x_h*c1h + + mulsd xmm7,xmm3 # (p14+p12+p10+p8+p6+p4)(*x2) + mulsd xmm0,xmm1 # (p13+p11+p9+p7+p5+p3)(*x) + + movsd xmm14,xmm13 + addsd xmm13,xmm2 # x_h*c1h+c0h + + addsd xmm0,xmm7 # p14+p13+..+p3 + + subsd xmm14,xmm13 # c0h-(x_h*c1h+c0h) + mulsd xmm4,xmm5 # x_l*c1h + + addsd xmm0,xmm15 # p14+p13+..+p3+p2 +# addsd xmm0,qword ptr (asin_p1_0l-asin_p1_c)[rcx] + + addsd xmm14,xmm2 # (c0h-(x_h*c1h+c0h))+x_h*c1h + addsd xmm4,xmm6 # x_l*c1h+x*c1l + + addsd xmm4,xmm14 # (c0h-(x_h*c1h+c0h))+x_h*c1h+x_l*c1h+x*c1l + + addsd xmm0,xmm4 + addsd xmm0,xmm13 + ret + +asin_real_1_or_e: + jne asin_real_e + + movlpd xmm0,qword ptr real_pi_d_2[rip] + ret + +asin_real_m_1_or_e: + att_jb asin_real_e # NAN + + movlpd xmm0,qword ptr real_m_pi_d_2[rip] + ret + +asin_real_e: + subsd xmm0,xmm0 + divsd xmm0,xmm0 + ret + + + .globl acos_real + +acos_real: +# ucomisd xmm0,qword ptr real_0_58 + ucomisd xmm0,qword ptr real_0_5[rip] + ja acos_real_2 + ucomisd xmm0,qword ptr real_m_0_58[rip] + jb acos_real_3 + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + movlpd xmm6,qword ptr acos_q_4[rip] + movlpd xmm7,qword ptr acos_q_3[rip] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movlpd xmm8,qword ptr acos_q_2[rip] + movlpd xmm9,qword ptr acos_q_1[rip] + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + movlpd xmm10,qword ptr acos_p_1[rip] + + movsd xmm3,xmm2 # x2 + mulsd xmm3,xmm4 # x6 + + mulsd xmm6,xmm0 # q8 + movsd xmm5,xmm0 # x8 + mulsd xmm0,xmm2 # x10 + + mulsd xmm5,qword ptr acos_p_4[rip] # p8 + + mulsd xmm7,xmm3 # q6 + mulsd xmm3,qword ptr acos_p_3[rip] # p6 + + addsd xmm0,xmm6 # x10+q8 + + mulsd xmm8,xmm4 # q4 + mulsd xmm4,qword ptr acos_p_2[rip] # p4 + + addsd xmm0,xmm7 # x10+q8+q6 + addsd xmm3,xmm5 # p8+p6 + + mulsd xmm9,xmm2 # q2 + mulsd xmm10,xmm2 # p2 + + addsd xmm0,xmm8 # x10+q8+q6+q4 + addsd xmm3,xmm4 # p8+p6+p4 + + movlpd xmm11,qword ptr real_21_bits[rip] + + addsd xmm0,xmm9 # x10+q8+q6+q4+q2 + addsd xmm3,xmm10 # p8+p6+p4+p2 + + andpd xmm11,xmm1 # x21 + + addsd xmm0,qword ptr acos_q_0[rip] # x10+q8+q6+q4+q2+q0 + addsd xmm3,qword ptr acos_p_0[rip] # p8+p6+p4+p2+p0 + + mulsd xmm2,xmm1 # x3 + + divsd xmm3,xmm0 # p/q + +# ucomisd xmm1,qword ptr real_0_54 +# ja acos_real_1 + + movsd xmm0,qword ptr real_pi_d_2[rip] # pi_d_2 + + subsd xmm1,xmm11 # x21_l + subsd xmm0,xmm11 # pi_d_2-x21 + + mulsd xmm2,xmm3 # x3*(p/q) + + addsd xmm2,qword ptr real_m_pi_d_2_l[rip] # x3*(p/q)-pi_d_2_l + + addsd xmm1,xmm2 # x21_l+x3*(p/q)-pi_d_2_l + + subsd xmm0,xmm1 # pi_d_2-x21-x21_l-x3*(p/q)+pi_d_2_l + + ret + +#acos_real_1: +# movlpd xmm0,qword ptr real_pi_d_2_m_0_03125 + +# subsd xmm1,xmm11 # x21_l +# subsd xmm0,xmm11 # pi_d_2-0.03125-x21 + +# mulsd xmm2,xmm3 # x3*(p/q) + +# addsd xmm1,qword ptr real_m_pi_d_2_l # x21_l-pi_d_2_l + +# subsd xmm2,qword ptr real_0_03125 # x3*(p/q)-0.03125 + +# addsd xmm1,xmm2 # x21_l+x3*(p/q)-0.03125-pi_d_2_l + +# subsd xmm0,xmm1 # pi_d_2-x21-x21_l-x3*(p/q)+pi_d_2_l + +# ret + +acos_real_2: + ucomisd xmm0,qword ptr real_0_75[rip] + jb acos_real_0_65 + + movlpd xmm1,qword ptr real_1_0[rip] + + ucomisd xmm0,xmm1 + + subsd xmm1,xmm0 + + jae acos_real_1_or_e + + movlpd xmm11,qword ptr real_2_0[rip] + + movsd xmm0,xmm1 # x + mulsd xmm1,xmm1 # x2 + + mulsd xmm11,xmm0 # 2x + + movlpd xmm7,qword ptr acos2_q_3[rip] + + movsd xmm2,xmm1 # x2 + mulsd xmm1,xmm1 # x4 + + sqrtsd xmm12,xmm11 # sqrt 2x + + movlpd xmm5,qword ptr acos2_p_3[rip] + movlpd xmm8,qword ptr acos2_q_2[rip] + + movsd xmm4,xmm1 # x4 + + movlpd xmm9,qword ptr acos2_q_1[rip] + movlpd xmm10,qword ptr acos2_p_1[rip] + + movsd xmm3,xmm2 # x2 + mulsd xmm3,xmm0 # x3 + + mulsd xmm1,qword ptr acos2_p_4[rip] # p4 + + mulsd xmm7,xmm3 # q3 + mulsd xmm5,xmm3 # p3 + + mulsd xmm8,xmm2 # q2 + mulsd xmm2,qword ptr acos2_p_2[rip] # p2 + + addsd xmm4,xmm7 # x4+q3 + addsd xmm1,xmm5 # p4+p3 + + mulsd xmm9,xmm0 # q1 + mulsd xmm10,xmm0 # p1 + + addsd xmm4,xmm8 # x4+q3+q2 + addsd xmm1,xmm2 # p4+p3+p2 + + addsd xmm4,xmm9 # x4+q3+q2+q1 + addsd xmm1,xmm10 # p4+p3+p2+p1 + + movlpd xmm13,qword ptr real_21_bits[rip] + + addsd xmm4,qword ptr acos2_q_0[rip] # x4+q3+q2+q1+q0 + addsd xmm1,qword ptr acos2_p_0[rip] # p4+p3+p2+p1+p0 + + andpd xmm13,xmm12 # s21 + + divsd xmm1,xmm4 # p/q + + movsd xmm14,xmm13 # s21 + movsd xmm15,xmm12 # s + addsd xmm12,xmm13 # s+s21 + mulsd xmm13,xmm13 # s21*s21 + subsd xmm11,xmm13 # 2x-s21*s21 + divsd xmm11,xmm12 # sl + + mulsd xmm0,xmm1 # x*p/q + + mulsd xmm0,xmm15 # s + addsd xmm0,xmm11 # s*x*p/q+sl + addsd xmm0,xmm14 # s*x*p/q+sl+s21 + + ret + +acos_real_0_65: + movlpd xmm1,qword ptr real_0_65000000004061742054[rip] + + ucomisd xmm0,xmm1 + subsd xmm0,xmm1 + + lea rcx,acos_p1_c[rip] + lea rdx,acos_p2_c[rip] + + cmovb rcx,rdx + + att_jmp asin_real_pm_0_54 + +acos_real_3: + movlpd xmm1,real_m_1_0[rip] + + ucomisd xmm0,xmm1 + + subsd xmm0,xmm1 + + jbe acos_real_m_1_or_e + + movlpd xmm11,qword ptr real_2_0[rip] + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + mulsd xmm11,xmm1 # 2x + + movlpd xmm7,qword ptr acos2_q_3[rip] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + sqrtsd xmm12,xmm11 # sqrt 2x + + movlpd xmm5,qword ptr acos2_p_3[rip] + movlpd xmm8,qword ptr acos2_q_2[rip] + + movsd xmm4,xmm0 # x4 + + movlpd xmm9,qword ptr acos2_q_1[rip] + movlpd xmm10,qword ptr acos2_p_1[rip] + + movsd xmm3,xmm2 # x2 + mulsd xmm3,xmm1 # x3 + + mulsd xmm0,qword ptr acos2_p_4[rip] # p4 + + mulsd xmm7,xmm3 # q3 + mulsd xmm5,xmm3 # p3 + + mulsd xmm8,xmm2 # q2 + mulsd xmm2,qword ptr acos2_p_2[rip] # p2 + + addsd xmm4,xmm7 # x4+q3 + addsd xmm0,xmm5 # p4+p3 + + mulsd xmm9,xmm1 # q1 + mulsd xmm10,xmm1 # p1 + + addsd xmm4,xmm8 # x4+q3+q2 + addsd xmm0,xmm2 # p4+p3+p2 + + addsd xmm4,xmm9 # x4+q3+q2+q1 + addsd xmm0,xmm10 # p4+p3+p2+p1 + + movlpd xmm13,qword ptr real_21_bits[rip] + + addsd xmm4,qword ptr acos2_q_0[rip] # x4+q3+q2+q1+q0 + addsd xmm0,qword ptr acos2_p_0[rip] # p4+p3+p2+p1+p0 + + andpd xmm13,xmm12 # s21 + + divsd xmm0,xmm4 # p/q + + movsd xmm14,xmm13 # s21 + movsd xmm15,xmm12 # s + addsd xmm12,xmm13 # s+s21 + mulsd xmm13,xmm13 # s21*s21 + subsd xmm11,xmm13 # 2x-s21*s21 + divsd xmm11,xmm12 # sl + + mulsd xmm1,xmm0 # x*p/q + + mulsd xmm1,xmm15 # s + movlpd xmm0,qword ptr real_pi[rip] + + subsd xmm11,qword ptr real_pi_l[rip] # sl-pi_l + addsd xmm1,xmm11 # s*x*p/q+sl-pi_l + + subsd xmm0,xmm14 # pi-s21 + subsd xmm0,xmm1 # pi-s21-(s*x*p/q+sl) + +# addsd xmm1,xmm14 # s*x*p/q+sl+s21-pi_l +# subsd xmm0,xmm1 # pi-(s*x*p/q+sl+s21) + + ret + +acos_real_1_or_e: + jne acos_real_e + + movlpd xmm0,qword ptr real_0_0[rip] + ret + +acos_real_m_1_or_e: + att_jb acos_real_e # NAN + + movlpd xmm0,qword ptr real_pi[rip] + ret + +acos_real_e: + subsd xmm0,xmm0 + divsd xmm0,xmm0 + ret + + .globl atan_real + +atan_real: + ucomisd xmm0,qword ptr real_m_0_26[rip] + + movsd xmm1,xmm0 # x + + jb atan_real_n # x<-0.26 | NAN + + ucomisd xmm0,qword ptr real_1_0[rip] + ja atan_real_g1 # x>1.0 + + ucomisd xmm0,qword ptr real_0_70[rip] + ja atan_real_2 + + ucomisd xmm0,qword ptr real_0_26[rip] + ja atan_real_1 + + ucomisd xmm0,qword ptr real_0_0[rip] + je atan_real_0_0 # atan -0.0 = -0.0 + + mulsd xmm0,xmm0 # x2 + + movlpd xmm5,qword ptr atan_p_9_1[rip] + + movsd xmm6,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movsd xmm14,xmm6 # x2 + mulsd xmm5,xmm6 # p2 + + mulsd xmm6,xmm0 # x6 + + movlpd xmm9,qword ptr atan_p_9_2[rip] + + movsd xmm10,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + mulsd xmm9,xmm10 # p4 + + movlpd xmm7,qword ptr atan_p_9_3[rip] + movlpd xmm12,qword ptr atan_p_9_4[rip] + + mulsd xmm10,xmm6 # x10 + + movsd xmm2,xmm6 # x6 + mulsd xmm7,xmm6 # p6 + mulsd xmm6,xmm6 # x12 + + mulsd xmm2,xmm0 # x14 + mulsd xmm12,xmm0 # p8 + mulsd xmm0,xmm0 # x16 + + mulsd xmm10,qword ptr atan_p_9_5[rip] # p10 + mulsd xmm6,qword ptr atan_p_9_6[rip] # p12 + mulsd xmm2,qword ptr atan_p_9_7[rip] # p14 + mulsd xmm0,qword ptr atan_p_9_8[rip] # p16 + + addsd xmm0,xmm2 # p16+p14 + addsd xmm0,xmm6 # p16+p14+p12 + addsd xmm0,xmm10 # p16+p14+p12+p10 + addsd xmm0,xmm12 # p16+p14+p12+p10+p8 + addsd xmm0,xmm7 # p16+p14+p12+p10+p8+p6 + + mulsd xmm14,xmm1 # x3 + + addsd xmm0,xmm9 # p16+p14+p12+p10+p8+p6+p4 + addsd xmm0,xmm5 # p16+p14+p12+p10+p8+p6+p4+p2 + addsd xmm0,qword ptr atan_p_9_0[rip] # p16+p14+p12+p10+p8+p6+p4+p2+p0 + + mulsd xmm0,xmm14 # x3*p + + addsd xmm0,xmm1 # x+x3*p + +atan_real_0_0: + ret + +atan_real_n: + ucomisd xmm0,qword ptr real_m_1_0[rip] + jb atan_real_l_m1 # x<-1.0 | NAN + + ucomisd xmm0,qword ptr real_m_0_70[rip] + jb atan_real_2n + + movlpd xmm2,qword ptr real_m_2_0[rip] + movlpd xmm3,qword ptr real_m_0_5[rip] + + lea rdx,n_atan_0_5_52[rip] + jmp atan_real_1pn + +atan_real_1: + movlpd xmm2,qword ptr real_2_0[rip] + movlpd xmm3,qword ptr real_0_5[rip] + + lea rdx,atan_0_5_52[rip] + +atan_real_1pn: + addsd xmm0,xmm2 # -2+x + subsd xmm3,xmm1 # -0.5-x + + divsd xmm2,xmm0 # 2/(2+x) = 1/(1+0.5x) + + movlpd xmm15,qword ptr real_26_bits[rip] + movsd xmm0,xmm3 # 0.5-x + + lea rcx,atan_n_0_5_8_c[rip] + movsd xmm11,xmm3 # (0.5-x) + andpd xmm15,xmm3 # (0.5-x)_h + + subsd xmm11,xmm15 # (0.5-x)_l + + mulsd xmm0,xmm2 # y=(0.5-x)*(1/(1+0.5x)) + + movsd xmm4,xmm0 # y + mulsd xmm0,xmm0 # y2 + + movlpd xmm5,qword ptr (atan_n_0_5_8_1-atan_n_0_5_8_c)[rcx] + + movsd xmm6,xmm0 # y2 + mulsd xmm0,xmm0 # y4 + + movsd xmm14,xmm6 # y2 + mulsd xmm5,xmm6 # p2 + + movlpd xmm8,qword ptr real_27_bits[rip] + + mulsd xmm6,xmm0 # y6 + + movlpd xmm9,qword ptr (atan_n_0_5_8_2-atan_n_0_5_8_c)[rcx] + + movsd xmm10,xmm0 # y4 + mulsd xmm0,xmm0 # y8 + + mulsd xmm9,xmm10 # p4 + + movlpd xmm12,qword ptr (atan_n_0_5_8_4-atan_n_0_5_8_c)[rcx] + + mulsd xmm10,xmm6 # y10 + + movsd xmm7,xmm6 # x6 + mulsd xmm6,xmm6 # x12 + + mulsd xmm12,xmm0 # p8 + mulsd xmm0,xmm7 # x14 + + andpd xmm8,xmm4 # y_h + + mulsd xmm7,qword ptr (atan_n_0_5_8_3-atan_n_0_5_8_c)[rcx] # p6 + mulsd xmm10,qword ptr (atan_n_0_5_8_5-atan_n_0_5_8_c)[rcx] # p10 + mulsd xmm6,qword ptr (atan_n_0_5_8_6-atan_n_0_5_8_c)[rcx] # p12 + mulsd xmm0,qword ptr (atan_n_0_5_8_7-atan_n_0_5_8_c)[rcx] # p14 + + movlpd xmm13,qword ptr real_0_25[rip] + movlpd xmm1,qword ptr (atan_0_5_real_0_5-atan_0_5_52)[rdx] + mulsd xmm13,xmm8 # 0.25*y_h + mulsd xmm1,xmm8 # 0.5*y_h + subsd xmm3,xmm8 # (0.5-x)-y_h + + addsd xmm0,xmm6 # p14+p12 + + subsd xmm3,xmm13 # (0.5-x)-1.25*y_h + mulsd xmm15,xmm1 # (0.5-x)_h*0.5*y_h + mulsd xmm11,xmm1 # (0.5-x)_l*0.5*y_h + + addsd xmm0,xmm10 # p14+p12+p10 + + addsd xmm3,xmm15 # (0.5-x)-1.25*y_h+(0.5-x)_h*0.5*y_h + + addsd xmm0,xmm12 # p14+p12+p10+p8 + + addsd xmm3,xmm11 # (0.5-x)-1.25*y_h+(0.5-x)_h*0.5*y_h+(0.5-x)_l*0.5*y_h + + addsd xmm0,xmm7 # p14+p12+p10+p8+p6 + + mulsd xmm14,xmm4 # y3 + + mulsd xmm3,xmm2 # c=((0.5-x)-1.25*y_h-(0.5-x)_h*0.5*y_h-(0.5-x)_l*0.5*y_h)*(1/(1+0.5x)) + + addsd xmm0,xmm9 # p14+p12+p10+p8+p6+p4 + + movsd xmm11,xmm8 # y_h + addsd xmm8,xmm3 # y_h+c + + addsd xmm0,xmm5 # p14+p12+p10+p8+p6+p4+p2 + + movlpd xmm7,qword ptr real_40_bits[rip] + movlpd xmm12,qword ptr real_abs_40_bits[rip] + andpd xmm7,xmm8 # (y_h+c)_h + andpd xmm12,xmm8 + + addsd xmm0,qword ptr (atan_n_0_5_8_0-atan_n_0_5_8_c)[rcx] # p14+p12+p10+p8+p6+p4+p2+p0 + + subsd xmm11,xmm7 # y_h-(y_h+c)_h + addsd xmm3,xmm11 # (y_h-(y_h+c)_h)+c + + mulsd xmm0,xmm14 # y3*p/q + + movlpd xmm2,qword ptr (atan_0_5_52_l-atan_0_5_52)[rdx] + subsd xmm2,xmm0 # atan_0_5_52_l-y3*p/q + movlpd xmm0,qword ptr (atan_0_5_52-atan_0_5_52)[rdx] + + ucomisd xmm12,qword ptr real_2_p_m_13[rip] + jb atan_real_1_s + + subsd xmm0,xmm7 # atan_0_5_52-(y_h+c)_h + subsd xmm2,xmm3 # atan_0_5_52_l-y3*p/q-(y_h+c)_l + addsd xmm0,xmm2 # atan 0_5-y3*p/q-(0.5-x)/(1+x) + + ret + +atan_real_1_s: + subsd xmm2,xmm3 # atan_0_5_52_l-y3*p/q-(y_h+c)_l + subsd xmm2,xmm7 # atan_0_5_52_l-y3*p/q-(y_h+c)_l-(y_h+c)_h + addsd xmm0,xmm2 # atan 0_5-y3*p/q-(1-x)/(1+x) + ret + +atan_real_2n: + lea rcx,atan_n_8_c[rip] + +atan_real_2n_: + movlpd xmm2,qword ptr real_m_1_0[rip] + movlpd xmm3,qword ptr real_m_1_0[rip] + + addsd xmm0,xmm2 # 1+x + subsd xmm3,xmm1 # 1-x + + lea rdx,n_atan_1_53[rip] + + divsd xmm2,xmm0 # 1/(1+x) + + movlpd xmm15,qword ptr real_26_bits[rip] + movsd xmm0,xmm3 # 1-x + + movsd xmm11,xmm3 # (1-x) + andpd xmm15,xmm3 # (1-x)_h + + subsd xmm11,xmm15 # (1-x)_l + + mulsd xmm0,xmm2 # y=(1-x)*(1/(1+x)) + + movsd xmm4,xmm0 # y + mulsd xmm0,xmm0 # y2 + + movlpd xmm5,qword ptr (atan_n_8_1-atan_n_8_c)[rcx] + + movsd xmm6,xmm0 # y2 + mulsd xmm0,xmm0 # y4 + + movsd xmm14,xmm6 # y2 + mulsd xmm5,xmm6 # p2 + + movlpd xmm8,qword ptr real_27_bits[rip] + + mulsd xmm6,xmm0 # y6 + + movlpd xmm9,qword ptr (atan_n_8_2-atan_n_8_c)[rcx] + + movsd xmm10,xmm0 # y4 + mulsd xmm0,xmm0 # y8 + + mulsd xmm9,xmm10 # p4 + + movlpd xmm12,qword ptr (atan_n_8_4-atan_n_8_c)[rcx] + + mulsd xmm10,xmm6 # y10 + + movsd xmm7,xmm6 # x6 + mulsd xmm6,xmm6 # x12 + + mulsd xmm12,xmm0 # p8 + mulsd xmm0,xmm7 # x14 + + andpd xmm8,xmm4 # y_h + + mulsd xmm7,qword ptr (atan_n_8_3-atan_n_8_c)[rcx] # p6 + mulsd xmm10,qword ptr (atan_n_8_5-atan_n_8_c)[rcx] # p10 + mulsd xmm6,qword ptr (atan_n_8_6-atan_n_8_c)[rcx] # p12 + mulsd xmm0,qword ptr (atan_n_8_7-atan_n_8_c)[rcx] # p14 + + movlpd xmm13,qword ptr real_2_0[rip] + mulsd xmm13,xmm8 # 2*y_h + mulsd xmm15,xmm8 # (1-x)_h*y_h + + addsd xmm0,xmm6 # p14+p12 + + subsd xmm3,xmm13 # (1-x)-2*y_h + mulsd xmm11,xmm8 # (1-x)_l*y_h + + addsd xmm0,xmm10 # p14+p12+p10 + + subsd xmm3,xmm15 # (1-x)-2*y_h-(1-x)_h*y_h + + addsd xmm0,xmm12 # p14+p12+p10+p8 + + subsd xmm3,xmm11 # (1-x)-2*y_h-(1-x)_h*y_h-(1-x)_h*y_l + jmp atan_real_2pn_ + +atan_real_2: + lea rcx,atan_n_8_c[rip] + +atan_real_2p_: + movlpd xmm2,qword ptr real_1_0[rip] + movlpd xmm3,qword ptr real_1_0[rip] + + addsd xmm0,xmm2 # 1+x + subsd xmm3,xmm1 # 1-x + + lea rdx,atan_1_53[rip] + + divsd xmm2,xmm0 # 1/(1+x) + + movlpd xmm15,qword ptr real_26_bits[rip] + movsd xmm0,xmm3 # 1-x + + movsd xmm11,xmm3 # (1-x) + andpd xmm15,xmm3 # (1-x)_h + + subsd xmm11,xmm15 # (1-x)_l + + mulsd xmm0,xmm2 # y=(1-x)*(1/(1+x)) + + movsd xmm4,xmm0 # y + mulsd xmm0,xmm0 # y2 + + movlpd xmm5,qword ptr (atan_n_8_1-atan_n_8_c)[rcx] + + movsd xmm6,xmm0 # y2 + mulsd xmm0,xmm0 # y4 + + movsd xmm14,xmm6 # y2 + mulsd xmm5,xmm6 # p2 + + movlpd xmm8,qword ptr real_27_bits[rip] + + mulsd xmm6,xmm0 # y6 + + movlpd xmm9,qword ptr (atan_n_8_2-atan_n_8_c)[rcx] + + movsd xmm10,xmm0 # y4 + mulsd xmm0,xmm0 # y8 + + mulsd xmm9,xmm10 # p4 + + movlpd xmm12,qword ptr (atan_n_8_4-atan_n_8_c)[rcx] + + mulsd xmm10,xmm6 # y10 + + movsd xmm7,xmm6 # x6 + mulsd xmm6,xmm6 # x12 + + mulsd xmm12,xmm0 # p8 + mulsd xmm0,xmm7 # x14 + + andpd xmm8,xmm4 # y_h + + mulsd xmm7,qword ptr (atan_n_8_3-atan_n_8_c)[rcx] # p6 + mulsd xmm10,qword ptr (atan_n_8_5-atan_n_8_c)[rcx] # p10 + mulsd xmm6,qword ptr (atan_n_8_6-atan_n_8_c)[rcx] # p12 + mulsd xmm0,qword ptr (atan_n_8_7-atan_n_8_c)[rcx] # p14 + + movlpd xmm13,qword ptr real_2_0[rip] + mulsd xmm13,xmm8 # 2*y_h + mulsd xmm15,xmm8 # (1-x)_h*y_h + + addsd xmm0,xmm6 # p14+p12 + + subsd xmm3,xmm13 # (1-x)-2*y_h + mulsd xmm11,xmm8 # (1-x)_l*y_h + + addsd xmm0,xmm10 # p14+p12+p10 + + addsd xmm3,xmm15 # (1-x)-2*y_h+(1-x)_h*y_h + + addsd xmm0,xmm12 # p14+p12+p10+p8 + + addsd xmm3,xmm11 # (1-x)-2*y_h+(1-x)_h*y_h+(1-x)_h*y_l + +atan_real_2pn_: + addsd xmm0,xmm7 # p14+p12+p10+p8+p6 + + mulsd xmm14,xmm4 # y3 + + mulsd xmm3,xmm2 # c=((1-x)-2*y_h-(1-x)_h*y_h-(1-x)_h*y_l)*(1/(1+x)) + + addsd xmm0,xmm9 # p14+p12+p10+p8+p6+p4 + + movsd xmm11,xmm8 # y_h + addsd xmm8,xmm3 # y_h+c + + addsd xmm0,xmm5 # p14+p12+p10+p8+p6+p4+p2 + + movlpd xmm7,qword ptr (atan_n_8_real_40_bits-atan_n_8_c)[rcx] + movlpd xmm12,qword ptr (atan_n_8_real_abs_40_bits-atan_n_8_c)[rcx] + andpd xmm7,xmm8 # (y_h+c)_h + andpd xmm12,xmm8 + + addsd xmm0,qword ptr (atan_n_8_0-atan_n_8_c)[rcx] # p14+p12+p10+p8+p6+p4+p2+p0 + + subsd xmm11,xmm7 # y_h-(y_h+c)_h + addsd xmm3,xmm11 # (y_h-(y_h+c)_h)+c + + mulsd xmm0,xmm14 # y3*p/q + + movlpd xmm2,qword ptr (atan_1_53_l-atan_1_53)[rdx] + subsd xmm2,xmm0 # pi_d_4_l-y3*p/q + movlpd xmm0,qword ptr (atan_1_53-atan_1_53)[rdx] + + ucomisd xmm12,qword ptr (atan_n_8_real_2_p_m_12-atan_n_8_c)[rcx] + jb atan_real_2_s + + subsd xmm0,xmm7 # pi_d_4-(y_h+c)_h + subsd xmm2,xmm3 # pi_d_4_l-y3*p/q-(y_h+c)_l + addsd xmm0,xmm2 # pi/4-y3*p/q-(1-x)/(1+x) + + ret + +atan_real_2_s: + subsd xmm2,xmm3 # pi_d_4_l-y3*p/q-(y_h+c)_l + subsd xmm2,xmm7 # pi_d_4_l-y3*p/q-(y_h+c)_l-(y_h+c)_h + addsd xmm0,xmm2 # pi/4-y3*p/q-(1-x)/(1+x) + ret + +atan_real_l_m1: + ucomisd xmm0,qword ptr real_m_4_0[rip] + jb atan_real_4n # x<4.0 | NAN + + ucomisd xmm0,qword ptr real_m_1_4[rip] + jb atan_real_3n + + lea rcx,atan_p_8_c[rip] + + att_jmp atan_real_2n_ + +atan_real_g1: + ucomisd xmm0,qword ptr real_4_0[rip] + ja atan_real_4 # x>4.0 + + ucomisd xmm0,qword ptr real_1_4[rip] + ja atan_real_3 + + lea rcx,atan_p_8_c[rip] + + att_jmp atan_real_2p_ + +atan_real_3n: + movlpd xmm2,qword ptr real_m_0_5[rip] + movlpd xmm3,qword ptr real_m_2_0[rip] + + lea rdx,n_atan_2_0[rip] + jmp atan_real_3pn + +atan_real_3: + movlpd xmm2,qword ptr real_0_5[rip] + movlpd xmm3,qword ptr real_2_0[rip] + + lea rdx,atan_2_0[rip] + +atan_real_3pn: + addsd xmm0,xmm2 # 0.5+x + subsd xmm3,xmm1 # 2-x + + divsd xmm2,xmm0 # 0.5/(0.5+x) = 1/(1+2*x) + + movlpd xmm15,qword ptr real_26_bits[rip] + movsd xmm0,xmm3 # 2-x + + movsd xmm11,xmm3 # (2-x) + andpd xmm15,xmm3 # (2-x)_h + + lea rcx,atan_n_2_8_c[rip] + subsd xmm11,xmm15 # (2-x)_l + + mulsd xmm0,xmm2 # y=(2-x)*(1/(1+0.5x)) + + movsd xmm4,xmm0 # y + mulsd xmm0,xmm0 # y2 + + movlpd xmm5,qword ptr (atan_n_2_8_1-atan_n_2_8_c)[rcx] + + movsd xmm6,xmm0 # y2 + mulsd xmm0,xmm0 # y4 + + movsd xmm14,xmm6 # y2 + mulsd xmm5,xmm6 # p2 + + movlpd xmm8,qword ptr real_27_bits[rip] + + mulsd xmm6,xmm0 # y6 + + movlpd xmm9,qword ptr (atan_n_2_8_2-atan_n_2_8_c)[rcx] + + movsd xmm10,xmm0 # y4 + mulsd xmm0,xmm0 # y8 + + mulsd xmm9,xmm10 # p4 + + movlpd xmm12,qword ptr (atan_n_2_8_4-atan_n_2_8_c)[rcx] + + mulsd xmm10,xmm6 # y10 + + movsd xmm7,xmm6 # x6 + mulsd xmm6,xmm6 # x12 + + mulsd xmm12,xmm0 # p8 + mulsd xmm0,xmm7 # x14 + + andpd xmm8,xmm4 # y_h + + mulsd xmm7,qword ptr (atan_n_2_8_3-atan_n_2_8_c)[rcx] # p6 + mulsd xmm10,qword ptr (atan_n_2_8_5-atan_n_2_8_c)[rcx] # p10 + mulsd xmm6,qword ptr (atan_n_2_8_6-atan_n_2_8_c)[rcx] # p12 + mulsd xmm0,qword ptr (atan_n_2_8_7-atan_n_2_8_c)[rcx] # p14 + + movlpd xmm13,qword ptr real_4_0[rip] + movlpd xmm1,qword ptr (atan_2_0_real_2_0-atan_2_0)[rdx] + mulsd xmm13,xmm8 # 4*y_h + mulsd xmm1,xmm8 # 2*y_h + + addsd xmm0,xmm6 # p14+p12 + + subsd xmm3,xmm13 # (2-x)-4*y_h + mulsd xmm15,xmm1 # (2-x)_h*2*y_h + mulsd xmm11,xmm1 # (2-x)_l*2*y_h + subsd xmm3,xmm8 # (2-x)-y_h + + addsd xmm0,xmm10 # p14+p12+p10 + + addsd xmm3,xmm15 # (2-x)-5*y_h+(2-x)_h*2*y_h + + addsd xmm0,xmm12 # p14+p12+p10+p8 + + addsd xmm3,xmm11 # (2-x)-5*y_h+(2-x)_h*2*y_h+(2-x)_l*2*y_h + + addsd xmm0,xmm7 # p14+p12+p10+p8+p6 + + mulsd xmm14,xmm4 # y3 + + mulsd xmm3,xmm2 # c=((2-x)-5*y_h-(2-x)_h*2*y_h-(2-x)_l*2*y_h)*(1/(1+0.5x)) + + addsd xmm0,xmm9 # p14+p12+p10+p8+p6+p4 + + movsd xmm11,xmm8 # y_h + addsd xmm8,xmm3 # y_h+c + + addsd xmm0,xmm5 # p14+p12+p10+p8+p6+p4+p2 + + movlpd xmm7,qword ptr real_39_bits[rip] + movlpd xmm12,qword ptr real_abs_39_bits[rip] + andpd xmm7,xmm8 + andpd xmm12,xmm8 # (y_h+c)_h + + addsd xmm0,qword ptr (atan_n_2_8_0-atan_n_2_8_c)[rcx] # p14+p12+p10+p8+p6+p4+p2+p0 + + subsd xmm11,xmm7 # y_h-(y_h+c)_h + addsd xmm3,xmm11 # (y_h-(y_h+c)_h)+c + + mulsd xmm0,xmm14 # y3*p/q + + movlpd xmm2,qword ptr (atan_2_0_l-atan_2_0)[rdx] + subsd xmm2,xmm0 # atan_2_0_l-y3*p/q + movlpd xmm0,qword ptr (atan_2_0-atan_2_0)[rdx] + + ucomisd xmm12,qword ptr real_2_p_m_11[rip] + jb atan_real_3_s + + subsd xmm0,xmm7 # atan_2_0-(y_h+c)_h + subsd xmm2,xmm3 # atan_2_0_l-y3*p/q-(y_h+c)_l + addsd xmm0,xmm2 # atan 2_0-y3*p/q-(2-x)/(1+x) + + ret + +atan_real_3_s: + subsd xmm2,xmm3 # atan_2_0_l-y3*p/q-(y_h+c)_l + subsd xmm2,xmm7 # atan_2_0_l-y3*p/q-(y_h+c)_l-(y_h+c)_h + addsd xmm0,xmm2 # atan 2_0-y3*p/q-(1-x)/(1+x) + ret + +atan_real_4n: + ucomisd xmm0,qword ptr real_atan_m_large[rip] + jb atan_real_m_large_or_nan # x<-5805358775541310.0840 | NAN + + movlpd xmm2,qword ptr real_1_0[rip] + mulsd xmm0,xmm0 # x2 + + lea rdx,real_m_pi_d_2[rip] + + jmp atan_real_4pn + +atan_real_4: + ucomisd xmm0,qword ptr real_atan_large[rip] + ja atan_real_large # x>5805358775541310.0840 + + movlpd xmm2,qword ptr real_1_0[rip] + mulsd xmm0,xmm0 # x2 + + lea rdx,real_pi_d_2[rip] + +atan_real_4pn: + divsd xmm2,xmm1 # 1/x + + lea rcx,atan4_p_c[rip] + + movlpd xmm4,qword ptr (atan4_q_4-atan4_p_c)[rcx] + movlpd xmm3,qword ptr (atan4_p_3-atan4_p_c)[rcx] + + movsd xmm5,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm4,xmm5 # q2 + mulsd xmm3,xmm5 # p2 + + movlpd xmm9,qword ptr (atan4_q_3-atan4_p_c)[rcx] + movlpd xmm8,qword ptr (atan4_p_2-atan4_p_c)[rcx] + + movsd xmm6,xmm0 # x4 + mulsd xmm0,xmm5 # x6 + + mulsd xmm9,xmm6 # q4 + mulsd xmm8,xmm6 # p4 + + movlpd xmm11,qword ptr (atan4_q_2-atan4_p_c)[rcx] + movlpd xmm10,qword ptr (atan4_p_1-atan4_p_c)[rcx] + + addsd xmm4,qword ptr real_1_0[rip] # 1+q2 + addsd xmm3,qword ptr (atan4_p_4-atan4_p_c)[rcx] # p0+p2 + + movsd xmm7,xmm6 # x4 + mulsd xmm6,xmm6 # x8 + + mulsd xmm11,xmm0 # q6 + mulsd xmm10,xmm0 # p6 + + addsd xmm4,xmm9 # 1+q2+q4 + addsd xmm3,xmm8 # p0+p2+p4 + + mulsd xmm0,xmm7 # x10 + + movlpd xmm9,qword ptr (atan4_q_1-atan4_p_c)[rcx] + movlpd xmm8,qword ptr (atan4_p_0-atan4_p_c)[rcx] + + addsd xmm4,xmm11 # 1+q2+q4+q6 + addsd xmm3,xmm10 # p0+p2+p4+p6 + + mulsd xmm9,xmm6 # q8 + mulsd xmm8,xmm6 # p8 + + mulsd xmm0,qword ptr (atan4_q_0-atan4_p_c)[rcx]# q10 + + movlpd xmm5,qword ptr real_26_bits[rip] + movsd xmm6,xmm1 # x + movlpd xmm7,qword ptr real_26_bits[rip] + andpd xmm5,xmm1 # x_h + subsd xmm6,xmm5 # x_l + andpd xmm7,xmm2 # (1/x)_h + + addsd xmm4,xmm9 # 1+q2+q4+q6+q8 + addsd xmm3,xmm8 # p0+p2+p4+p6+p8 + + movlpd xmm8,qword ptr real_1_0[rip] + mulsd xmm5,xmm7 # x_h*(1/x)_h + mulsd xmm6,xmm7 # x_l*(1/x)_h + + addsd xmm4,xmm0 # 1+q2+q4+q6+q8+q10 + + subsd xmm8,xmm5 # 1-x_h*(1/x)_h + + subsd xmm8,xmm6 # 1-x_h*(1/x)_h-x_l*(1/x)_h + + mulsd xmm8,xmm2 # (1-x_h*(1/x)_h-x_l*(1/x)_h)*(1/x) + + divsd xmm3,xmm4 # p/q + + movlpd xmm1,qword ptr (real_pi_d_2_l-real_pi_d_2)[rdx] + movlpd xmm0,qword ptr (real_pi_d_2-real_pi_d_2)[rdx] + + mulsd xmm3,xmm2 # (p/q)*(1/x) + + subsd xmm1,xmm3 # pi_d_2_l-(p/q)*(1/x) + subsd xmm0,xmm7 # pi_d_2-(1/x)_h + subsd xmm1,xmm8 # pi_d_2_l-(p/q)*(1/x)-(1/x)_l + addsd xmm0,xmm1 + ret + +atan_real_4poly: + movlpd xmm3,qword ptr real_1_0[rip] + divsd xmm3,xmm0 # 1/x + + movlpd xmm5,qword ptr real_26_bits[rip] + movsd xmm6,xmm0 # x + movlpd xmm7,qword ptr real_26_bits[rip] + andpd xmm5,xmm0 # x_h + + subsd xmm6,xmm5 # x_l + + movsd xmm15,xmm3 # x + mulsd xmm3,xmm3 # x2 + + movlpd xmm1,qword ptr atan_p_9_1[rip] + + movsd xmm11,xmm3 # x2 + mulsd xmm3,xmm3 # x4 + + movsd xmm14,xmm11 # x2 + mulsd xmm1,xmm11 # p2 + + mulsd xmm11,xmm3 # x6 + + movlpd xmm9,qword ptr atan_p_9_2[rip] + + movsd xmm10,xmm3 # x4 + mulsd xmm3,xmm3 # x8 + + mulsd xmm9,xmm10 # p4 + + movlpd xmm4,qword ptr atan_p_9_3[rip] + movlpd xmm12,qword ptr atan_p_9_4[rip] + + mulsd xmm10,xmm11 # x10 + + movsd xmm2,xmm11 # x6 + mulsd xmm4,xmm11 # p6 + mulsd xmm11,xmm11 # x12 + + mulsd xmm2,xmm3 # x14 + mulsd xmm12,xmm3 # p8 + mulsd xmm3,xmm3 # x16 + + mulsd xmm10,qword ptr atan_p_9_5[rip] # p10 + mulsd xmm11,qword ptr atan_p_9_6[rip] # p12 + mulsd xmm2,qword ptr atan_p_9_7[rip] # p14 + mulsd xmm3,qword ptr atan_p_9_8[rip] # p16 + + addsd xmm3,xmm2 # p16+p14 + addsd xmm3,xmm11 # p16+p14+p12 + addsd xmm3,xmm10 # p16+p14+p12+p10 + + andpd xmm7,xmm15 # (1/x)_h + + addsd xmm3,xmm12 # p16+p14+p12+p10+p8 + + movlpd xmm8,qword ptr real_1_0[rip] + mulsd xmm5,xmm7 # x_h*(1/x)_h + mulsd xmm6,xmm7 # x_l*(1/x)_h + + addsd xmm3,xmm4 # p16+p14+p12+p10+p8+p6 + + mulsd xmm14,xmm15 # x3 + + subsd xmm8,xmm5 # 1-x_h*(1/x)_h + + addsd xmm3,xmm9 # p16+p14+p12+p10+p8+p6+p4 + + subsd xmm8,xmm6 # 1-x_h*(1/x)_h-x_l*(1/x)_h + + addsd xmm3,xmm1 # p16+p14+p12+p10+p8+p6+p4+p2 + + mulsd xmm8,xmm15 # (1-x_h*(1/x)_h-x_l*(1/x)_h)*(1/x) + + addsd xmm3,qword ptr atan_p_9_0[rip] # p16+p14+p12+p10+p8+p6+p4+p2+p0 + + movlpd xmm1,qword ptr real_pi_d_2_l[rip] + movlpd xmm0,qword ptr real_pi_d_2[rip] + + mulsd xmm3,xmm14 # x3*p + + subsd xmm1,xmm3 # pi_d_2_l-(p/q)*(1/x) + subsd xmm0,xmm7 # pi_d_2-(1/x)_h + subsd xmm1,xmm8 # pi_d_2_l-(p/q)*(1/x)-(1/x)_l + addsd xmm0,xmm1 + ret + +atan_real_m_large_or_nan: + jp atan_real_nan + + movlpd xmm0,qword ptr real_m_pi_d_2[rip] + ret + +atan_real_nan: + addsd xmm0,xmm0 + ret + +atan_real_large: + movlpd xmm0,qword ptr real_pi_d_2[rip] + ret + + .globl exp_real + +exp_real: + ucomisd xmm0,qword ptr real_ln_2_t_0_5[rip] + ja exp_real_2 # x>0.3.4657359027997265471 + + ucomisd xmm0,qword ptr real_n_ln_2_t_0_5[rip] + jb exp_real_3_n # x<-0.34657359027997265471 | nan + + movlpd xmm1,qword ptr n_45_d_256[rip] + movlpd xmm2,qword ptr n_m_45_d_256[rip] + + ucomisd xmm0,xmm1 + ja exp_real_p1 + + ucomisd xmm0,xmm2 + jb exp_real_n1 + + ucomisd xmm0,qword ptr real_0_0[rip] + + lea rcx,exp_p0_c[rip] + lea rdx,exp_m0_c[rip] + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + cmovb rcx,rdx + + movlpd xmm6,qword ptr (exp_p0_2-exp_p0_c)[rcx] + movlpd xmm7,qword ptr (exp_p0_4-exp_p0_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm6,xmm2 # p2 + + movsd xmm3,xmm1 # x + mulsd xmm1,xmm2 # x3 + + movlpd xmm8,qword ptr (exp_p0_3-exp_p0_c)[rcx] + mulsd xmm7,xmm0 # p4 + + mulsd xmm8,xmm1 # p3 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + movsd xmm5,xmm2 # x2 + mulsd xmm2,xmm1 # x5 + + mulsd xmm5,xmm4 # x6 + mulsd xmm1,xmm4 # x7 + mulsd xmm4,xmm2 # x9 + + mulsd xmm0,qword ptr (exp_p0_8-exp_p0_c)[rcx] # p8 + mulsd xmm2,qword ptr (exp_p0_5-exp_p0_c)[rcx] # p5 + mulsd xmm5,qword ptr (exp_p0_6-exp_p0_c)[rcx] # p6 + mulsd xmm1,qword ptr (exp_p0_7-exp_p0_c)[rcx] # p7 + mulsd xmm4,qword ptr (exp_p0_9-exp_p0_c)[rcx] # p9 + + addsd xmm0,xmm4 # p9+p8 + + movlpd xmm9,qword ptr real_26_bits[rip] + movlpd xmm10,qword ptr real_1_0[rip] + + addsd xmm0,xmm1 # p9+p8+p7 + addsd xmm0,xmm5 # p9+p8+p7+p6 + + andpd xmm9,xmm3 # x_h + + addsd xmm0,xmm2 # p9+p8+p7+p6+p5 + + addsd xmm9,xmm10 # 1+x_h + + addsd xmm0,xmm7 # p9+p8+p7+p6+p5+p4 + + subsd xmm10,xmm9 # 1-(1+x_h) + + addsd xmm0,xmm8 # p9+p8+p7+p6+p5+p4+p3 + + addsd xmm10,xmm3 # (1-(1+x_h))+x + + addsd xmm0,xmm6 # p9+p8+p7+p6+p5+p4+p3+p2 + + addsd xmm0,xmm10 + addsd xmm0,xmm9 + ret + +exp_real_n1: + subsd xmm0,xmm2 + + lea rcx,exp_m1_c[rip] + jmp exp_real_np1 + +exp_real_p1: + subsd xmm0,xmm1 + + lea rcx,exp_p1_c[rip] + +exp_real_np1: + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + movlpd xmm6,qword ptr (exp_p1_2-exp_p1_c)[rcx] + movlpd xmm7,qword ptr (exp_p1_4-exp_p1_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm6,xmm2 # p2 + + movsd xmm3,xmm1 # x + mulsd xmm1,xmm2 # x3 + + movlpd xmm8,qword ptr (exp_p1_3-exp_p1_c)[rcx] + mulsd xmm7,xmm0 # p4 + + mulsd xmm8,xmm1 # p3 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + movsd xmm5,xmm2 # x2 + mulsd xmm2,xmm1 # x5 + + mulsd xmm5,xmm4 # x6 + mulsd xmm1,xmm4 # x7 + mulsd xmm4,xmm2 # x9 + + mulsd xmm0,qword ptr (exp_p1_8-exp_p1_c)[rcx] # p8 + mulsd xmm2,qword ptr (exp_p1_5-exp_p1_c)[rcx] # p5 + mulsd xmm5,qword ptr (exp_p1_6-exp_p1_c)[rcx] # p6 + mulsd xmm1,qword ptr (exp_p1_7-exp_p1_c)[rcx] # p7 + mulsd xmm4,qword ptr (exp_p1_9-exp_p1_c)[rcx] # p9 + + addsd xmm0,xmm4 # p9+p8 + + movlpd xmm9,qword ptr real_26_bits[rip] + + addsd xmm0,xmm1 # p9+p8+p7 + + movsd xmm11,xmm3 # x + andpd xmm9,xmm3 # x_h + mulsd xmm3,qword ptr (exp_p1_1_l-exp_p1_c)[rcx] # x*c1_l + + addsd xmm0,xmm5 # p9+p8+p7+p6 + + subsd xmm11,xmm9 # x_l + mulsd xmm9,qword ptr (exp_p1_1_h-exp_p1_c)[rcx] # x_h*c1_h + + addsd xmm0,xmm2 # p9+p8+p7+p6+p5 + + movlpd xmm10,qword ptr (exp_p1_0-exp_p1_c)[rcx] + mulsd xmm11,qword ptr (exp_p1_1_h-exp_p1_c)[rcx] # x_l*c1_h + addsd xmm10,xmm9 # x_h*c1_h+c0 + movlpd xmm12,qword ptr (exp_p1_0-exp_p1_c)[rcx] + + addsd xmm0,xmm7 # p9+p8+p7+p6+p5+p4 + + addsd xmm3,xmm11 # x*c1_l+x_l*c1_h + subsd xmm12,xmm10 # c0-(x_h*c1_h+c0) + + addsd xmm0,xmm8 # p9+p8+p7+p6+p5+p4+p3 + + addsd xmm12,xmm9 # (c0-(x_h*c1_h+c0))+x_h*c1_h + + addsd xmm0,xmm6 # p9+p8+p7+p6+p5+p4+p3+p2 + + addsd xmm3,xmm12 # (c0-(x_h*c1_h+c0))+x_h*c1_h+x*c1_l+x_l*c1_h + + addsd xmm0,qword ptr (exp_p1_0_l-exp_p1_c)[rcx] + + addsd xmm0,xmm3 + addsd xmm0,xmm10 + ret + +exp_real_2: + ucomisd xmm0,qword ptr real_ln_2_t_1_5[rip] + ja exp_real_3 + + subsd xmm0,qword ptr real_ln2_42[rip] + movlpd xmm14,qword ptr real_ln2_42_l[rip] + + movsd xmm13,xmm0 + subsd xmm0,xmm14 + + movlpd xmm15,qword ptr real_2_0[rip] + +exp_real_: + movlpd xmm1,qword ptr n_45_d_256[rip] + movlpd xmm2,qword ptr n_m_45_d_256[rip] + + ucomisd xmm0,xmm1 + ja exp_real_p1_ + + ucomisd xmm0,xmm2 + jb exp_real_n1_ + + ucomisd xmm0,qword ptr real_0_0[rip] + + lea rcx,exp_p0_c[rip] + lea rdx,exp_m0_c[rip] + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + cmovb rcx,rdx + + movlpd xmm6,qword ptr (exp_p0_2-exp_p0_c)[rcx] + movlpd xmm7,qword ptr (exp_p0_4-exp_p0_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm6,xmm2 # p2 + + movsd xmm3,xmm1 # x + mulsd xmm1,xmm2 # x3 + + movlpd xmm8,qword ptr (exp_p0_3-exp_p0_c)[rcx] + mulsd xmm7,xmm0 # p4 + + mulsd xmm8,xmm1 # p3 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + movsd xmm5,xmm2 # x2 + mulsd xmm2,xmm1 # x5 + + mulsd xmm5,xmm4 # x6 + mulsd xmm1,xmm4 # x7 + mulsd xmm4,xmm2 # x9 + + mulsd xmm0,qword ptr (exp_p0_8-exp_p0_c)[rcx] # p8 + mulsd xmm2,qword ptr (exp_p0_5-exp_p0_c)[rcx] # p5 + mulsd xmm5,qword ptr (exp_p0_6-exp_p0_c)[rcx] # p6 + mulsd xmm1,qword ptr (exp_p0_7-exp_p0_c)[rcx] # p7 + mulsd xmm4,qword ptr (exp_p0_9-exp_p0_c)[rcx] # p9 + + addsd xmm0,xmm4 # p9+p8 + + movlpd xmm9,qword ptr real_26_bits[rip] + movlpd xmm10,qword ptr real_1_0[rip] + + addsd xmm0,xmm1 # p9+p8+p7 + + andpd xmm9,xmm3 # x_h + + addsd xmm0,xmm5 # p9+p8+p7+p6 + + addsd xmm9,xmm10 # 1+x_h + + addsd xmm0,xmm2 # p9+p8+p7+p6+p5 + + subsd xmm10,xmm9 # 1-(1+x_h) + + addsd xmm0,xmm7 # p9+p8+p7+p6+p5+p4 + + addsd xmm10,xmm13 # (1-(1+x_h))+xh + + addsd xmm0,xmm8 # p9+p8+p7+p6+p5+p4+p3 + + subsd xmm10,xmm14 # (1-(1+x_h))+xh-nxl + + addsd xmm0,xmm6 # p9+p8+p7+p6+p5+p4+p3+p2 + + addsd xmm0,xmm10 + addsd xmm0,xmm9 + + mulsd xmm0,xmm15 + ret + +exp_real_n1_: + subsd xmm0,xmm2 + subsd xmm13,xmm2 + + lea rcx,exp_m1_c[rip] + jmp exp_real_np1_ + +exp_real_p1_: + subsd xmm0,xmm1 + subsd xmm13,xmm1 + + lea rcx,exp_p1_c[rip] + +exp_real_np1_: + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + movlpd xmm6,qword ptr (exp_p1_2-exp_p1_c)[rcx] + movlpd xmm7,qword ptr (exp_p1_4-exp_p1_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm6,xmm2 # p2 + + movsd xmm3,xmm1 # x + mulsd xmm1,xmm2 # x3 + + movlpd xmm8,qword ptr (exp_p1_3-exp_p1_c)[rcx] + mulsd xmm7,xmm0 # p4 + + mulsd xmm8,xmm1 # p3 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + movsd xmm5,xmm2 # x2 + mulsd xmm2,xmm1 # x5 + + mulsd xmm5,xmm4 # x6 + mulsd xmm1,xmm4 # x7 + mulsd xmm4,xmm2 # x9 + + mulsd xmm0,qword ptr (exp_p1_8-exp_p1_c)[rcx] # p8 + mulsd xmm2,qword ptr (exp_p1_5-exp_p1_c)[rcx] # p5 + mulsd xmm5,qword ptr (exp_p1_6-exp_p1_c)[rcx] # p6 + mulsd xmm1,qword ptr (exp_p1_7-exp_p1_c)[rcx] # p7 + mulsd xmm4,qword ptr (exp_p1_9-exp_p1_c)[rcx] # p9 + + movlpd xmm9,qword ptr real_26_bits[rip] + + addsd xmm0,xmm4 # p9+p8 + + movsd xmm11,xmm13 # xh + andpd xmm9,xmm3 # x_h + mulsd xmm3,qword ptr (exp_p1_1_l-exp_p1_c)[rcx] # x*c1_l + + addsd xmm0,xmm1 # p9+p8+p7 + + subsd xmm11,xmm9 # xh-x_h + + addsd xmm0,xmm5 # p9+p8+p7+p6 + + subsd xmm11,xmm14 # x_l=(xh-x_h)-nxl + + mulsd xmm9,qword ptr (exp_p1_1_h-exp_p1_c)[rcx] # x_h*c1_h + + addsd xmm0,xmm2 # p9+p8+p7+p6+p5 + + movlpd xmm10,qword ptr (exp_p1_0-exp_p1_c)[rcx] + mulsd xmm11,qword ptr (exp_p1_1_h-exp_p1_c)[rcx] # x_l*c1_h + addsd xmm10,xmm9 # x_h*c1_h+c0 + movlpd xmm12,qword ptr (exp_p1_0-exp_p1_c)[rcx] + + addsd xmm0,xmm7 # p9+p8+p7+p6+p5+p4 + + addsd xmm3,xmm11 # x*c1_l+x_l*c1_h + subsd xmm12,xmm10 # c0-(x_h*c1_h+c0) + + addsd xmm0,xmm8 # p9+p8+p7+p6+p5+p4+p3 + + addsd xmm12,xmm9 # (c0-(x_h*c1_h+c0))+x_h*c1_h + + addsd xmm0,xmm6 # p9+p8+p7+p6+p5+p4+p3+p2 + + addsd xmm3,xmm12 # (c0-(x_h*c1_h+c0))+x_h*c1_h+x*c1_l+x_l*c1_h + + addsd xmm0,qword ptr (exp_p1_0_l-exp_p1_c)[rcx] + + addsd xmm0,xmm3 + addsd xmm0,xmm10 + + mulsd xmm0,xmm15 + ret + +exp_real_3_n: + movlpd xmm1,qword ptr real_1_d_ln2[rip] + mulsd xmm1,xmm0 + movq xmm4,qword ptr m_round_c[rip] + subsd xmm1,qword ptr real_0_5[rip] + + ucomisd xmm1,qword ptr real_m_1022[rip] + jae exp_real_3_pn + + ucomisd xmm1,qword ptr real_m_1076[rip] + jb exp_real_3_n_large + + movq xmm9,qword ptr qw_1078[rip] + + call exp_real_3_pn_ + + mulsd xmm0,qword ptr real_2_p_m_55[rip] + ret + +exp_real_3: + movlpd xmm1,qword ptr real_1_d_ln2[rip] + mulsd xmm1,xmm0 + movq xmm4,qword ptr round_c[rip] + addsd xmm1,qword ptr real_0_5[rip] + + ucomisd xmm1,qword ptr real_1024[rip] + jae exp_real_3_large + +exp_real_3_pn: + movq xmm9,qword ptr qw_1023[rip] + +exp_real_3_pn_: + movq xmm2,xmm1 + psrlq xmm1,52 + psubq xmm4,xmm1 + movq xmm1,qword ptr mask_all_one[rip] + psllq xmm1,xmm4 + andpd xmm1,xmm2 + + movlpd xmm13,qword ptr real_ln2_42[rip] + movlpd xmm14,qword ptr real_ln2_42_l[rip] + + mulsd xmm13,xmm1 + mulsd xmm14,xmm1 + + cvtpd2dq xmm15,xmm1 + subsd xmm0,xmm13 + + movsd xmm13,xmm0 + subsd xmm0,xmm14 + + paddq xmm15,xmm9 + psllq xmm15,52 + att_jmp exp_real_ + +exp_real_3_large: + ucomisd xmm1,qword ptr real_1025[rip] + jae exp_real_3_larger + + movq xmm9,qword ptr qw_1022[rip] + + att_call exp_real_3_pn_ + + mulsd xmm0,qword ptr real_2_0[rip] + ret + +exp_real_3_larger: + ucomisd xmm0,qword ptr real_max[rip] + ja exp_real_3_inf_or_nan + movlpd xmm0,qword ptr qword ptr real_2_p_1023[rip] + mulsd xmm0,xmm0 +exp_real_3_inf_or_nan: + ret + +exp_real_3_n_large: + att_jp exp_real_3_inf_or_nan # nan + movlpd xmm0,qword ptr real_0_0[rip] + ret + + .globl ln_real + +ln_real: + ucomisd xmm0,qword ptr real_0_8243[rip] + jb ln_real_small # x<0.82436063535006407342 | nan + ucomisd xmm0,qword ptr real_1_6487[rip] + ja ln_real_large # x>1.6487212707001281468 + movlpd xmm2,qword ptr real_1_0[rip] + + ucomisd xmm0,qword ptr real_7_d_6[rip] + ja ln_real_a_7_d_6 + + ucomisd xmm0,xmm2 + + subsd xmm0,xmm2 + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + jae ln_real_0 + jmp ln_real_s + +ln_real_small: + ucomisd xmm0,qword ptr real_2_p_m_1022[rip] + jae ln_real_ + + ucomisd xmm0,qword ptr real_0_0[rip] + jbe ln_real_less_or_equal_0_or_nan + +# denormalized number + + mulsd xmm0,qword ptr real_2_p_55[rip] + + movlpd xmm1,qword ptr mask_significand[rip] + andpd xmm1,xmm0 + psrlq xmm0,52 + + movlpd xmm2,qword ptr real_1_0[rip] + movlpd xmm3,qword ptr real_0_5[rip] + orpd xmm2,xmm1 + orpd xmm3,xmm1 + + ucomisd xmm2,qword ptr real_1_6487[rip] + jbe ln_real_dn_g1 + + subsd xmm3,qword ptr real_1_0[rip] + psubq xmm0,xmmword ptr qw_1077[rip] + jmp ln_real_l1_ + +ln_real_dn_g1: + ucomisd xmm2,qword ptr real_7_d_6[rip] + + subsd xmm2,qword ptr real_1_0[rip] + psubq xmm0,xmmword ptr qw_1078[rip] + + ja ln_real_g_7_d_6 + jmp ln_real_g1_ + +ln_real_large: + ucomisd xmm0,qword ptr real_max[rip] + ja ln_real_inf_or_nan + +ln_real_: + movlpd xmm1,qword ptr mask_significand[rip] + andpd xmm1,xmm0 + psrlq xmm0,52 + + movlpd xmm2,qword ptr real_1_0[rip] + movlpd xmm3,qword ptr real_0_5[rip] + orpd xmm2,xmm1 + orpd xmm3,xmm1 + + ucomisd xmm2,qword ptr real_1_6487[rip] + jbe ln_real_g1 + jmp ln_real_l1 + +ln_real_0: + movlpd xmm3,qword ptr ln_b_q_1[rip] + movlpd xmm2,qword ptr ln_b_p_1[rip] + + mulsd xmm3,xmm1 # q1 + mulsd xmm2,xmm1 # p1 + movsd xmm7,xmm1 # x + movlpd xmm5,qword ptr ln_b_q_2[rip] + movlpd xmm4,qword ptr ln_b_p_2[rip] + movsd xmm6,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movsd xmm11,xmm1 # x + mulsd xmm7,xmm6 # x3 + + movlpd xmm9,qword ptr ln_b_q_3[rip] + movlpd xmm8,qword ptr ln_b_p_3[rip] + + mulsd xmm11,xmm0 # x5 + + mulsd xmm5,xmm6 # q2 + mulsd xmm4,xmm6 # p2 + + movlpd xmm10,qword ptr ln_b_q_4[rip] + + mulsd xmm9,xmm7 # q3 + mulsd xmm8,xmm7 # p3 + + mulsd xmm10,xmm0 # q4 + mulsd xmm0,qword ptr ln_b_p_4[rip] # p4 + + addsd xmm10,xmm11 # q4+x5 + + addsd xmm0,xmm8 # p3+p4 + addsd xmm9,xmm10 # q3+q4+x5 + + movlpd xmm6,qword ptr real_17_bits[rip] + + addsd xmm0,xmm4 # p2+p3+p4 + addsd xmm5,xmm9 # q2+q3+q4+x5 + + andpd xmm6,xmm1 # x_17_h + movsd xmm8,xmm1 # x + movlpd xmm4,qword ptr real_0_5[rip] + + addsd xmm0,xmm2 # p1+p2+p3+p4 + addsd xmm3,xmm5 # q1+q2+q3+q4+x5 + + subsd xmm8,xmm6 # x_17_l + mulsd xmm4,xmm6 # 0.5*x_17_h + + addsd xmm0,qword ptr ln_b_p_0[rip] # p0+p1+p2+p3+p4 + addsd xmm3,qword ptr ln_b_q_0[rip] # q0+q1+q2+q3+q4+x5 + + mulsd xmm8,qword ptr real_0_5[rip] # 0.5*x_17_l + mulsd xmm4,xmm6 # 0.5*x_17_h*x_17_h + addsd xmm6,xmm1 # x+x_17_h + + ucomisd xmm1,qword ptr real_2_p_m_16[rip] + jb ln_real_0_s + + divsd xmm0,xmm3 # p/q + + subsd xmm1,xmm4 # x-0.5*x_17_h*x_17_h + mulsd xmm6,xmm8 # 0.5*x_17_l*(x+x_17_h) + + mulsd xmm0,xmm7 # (p/q)*x3 + subsd xmm0,xmm6 # (p/q)*x3-0.5x2_l + addsd xmm0,xmm1 # (p/q)*x3-0.5x2+x + ret + +ln_real_0_s: + divsd xmm0,xmm3 # p/q + + mulsd xmm6,xmm8 # 0.5*x_17_l*(x+x_17_h) + + mulsd xmm0,xmm7 # (p/q)*x3 + subsd xmm0,xmm6 # (p/q)*x3-0.5x2_l + subsd xmm0,xmm4 # (p/q)*x3-0.5x2 + addsd xmm0,xmm1 # (p/q)*x3-0.5x2+x + ret + +ln_real_a_7_d_6: + subsd xmm0,xmm2 + + mulsd xmm0,qword ptr real_0_75[rip] + movlpd xmm1,qword ptr real_0_25[rip] + + lea rcx,ln_b_c[rip] + lea rdx,ln_s_c[rip] + + ucomisd xmm0,xmm1 + subsd xmm0,xmm1 + + cmovb rcx,rdx + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + movlpd xmm3,qword ptr (ln_b_q_1-ln_b_c)[rcx] + movlpd xmm2,qword ptr (ln_b_p_1-ln_b_c)[rcx] + + mulsd xmm3,xmm1 # q1 + mulsd xmm2,xmm1 # p1 + movsd xmm7,xmm1 # x + movlpd xmm5,qword ptr (ln_b_q_2-ln_b_c)[rcx] + movlpd xmm4,qword ptr (ln_b_p_2-ln_b_c)[rcx] + movsd xmm6,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movsd xmm11,xmm1 # x + mulsd xmm7,xmm6 # x3 + + movlpd xmm9,qword ptr (ln_b_q_3-ln_b_c)[rcx] + movlpd xmm8,qword ptr (ln_b_p_3-ln_b_c)[rcx] + + mulsd xmm11,xmm0 # x5 + + mulsd xmm5,xmm6 # q2 + mulsd xmm4,xmm6 # p2 + + movlpd xmm6,qword ptr real_17_bits[rip] + movlpd xmm10,qword ptr (ln_b_q_4-ln_b_c)[rcx] + + mulsd xmm9,xmm7 # q3 + mulsd xmm8,xmm7 # p3 + + andpd xmm6,xmm1 # x_17_h + + mulsd xmm10,xmm0 # q4 + mulsd xmm0,qword ptr (ln_b_p_4-ln_b_c)[rcx] # p4 + + addsd xmm10,xmm11 # q4+x5 + + addsd xmm0,xmm8 # p3+p4 + addsd xmm9,xmm10 # q3+q4+x5 + + movsd xmm8,xmm1 # x + + addsd xmm0,xmm4 # p2+p3+p4 + addsd xmm5,xmm9 # q2+q3+q4+x5 + + movlpd xmm4,qword ptr real_0_5[rip] + subsd xmm8,xmm6 # x_17_l + mulsd xmm4,xmm6 # 0.5*x_17_h + + addsd xmm0,xmm2 # p1+p2+p3+p4 + addsd xmm3,xmm5 # q1+q2+q3+q4+x5 + + mulsd xmm8,qword ptr real_0_5[rip] # 0.5*x_17_l + mulsd xmm4,xmm6 # 0.5*x_17_h*x_17_h + addsd xmm6,xmm1 # x+x_17_h + + addsd xmm0,qword ptr (ln_b_p_0-ln_b_c)[rcx] # p0+p1+p2+p3+p4 + addsd xmm3,qword ptr (ln_b_q_0-ln_b_c)[rcx] # q0+q1+q2+q3+q4+x5 + + subsd xmm1,xmm4 # x-0.5*x_17_h*x_17_h + mulsd xmm6,xmm8 # 0.5*x_17_l*(x+x_17_h) + + divsd xmm0,xmm3 # p/q + + movlpd xmm2,qword ptr ln_4_d_3_53[rip] + movlpd xmm4,qword ptr ln_4_d_3_53_l[rip] + addsd xmm2,xmm1 + subsd xmm4,xmm6 + + movlpd xmm3,qword ptr ln_4_d_3_53[rip] + subsd xmm3,xmm2 + + mulsd xmm0,xmm7 # (p/q)*x3 + + addsd xmm3,xmm1 + addsd xmm3,xmm4 + + addsd xmm0,xmm3 + addsd xmm0,xmm2 + ret + +ln_real_g1: + ucomisd xmm2,qword ptr real_7_d_6[rip] + + subsd xmm2,qword ptr real_1_0[rip] + + psubq xmm0,xmmword ptr qw_1023[rip] + + att_ja ln_real_g_7_d_6 + +ln_real_g1_: + movsd xmm1,xmm2 # x + mulsd xmm2,xmm2 # x2 + + movlpd xmm4,qword ptr ln_b_q_1[rip] + movlpd xmm3,qword ptr ln_b_p_1[rip] + mulsd xmm4,xmm1 # q1 + mulsd xmm3,xmm1 # p1 + movsd xmm8,xmm1 # x + + cvtdq2pd xmm0,xmm0 + + movlpd xmm6,qword ptr ln_b_q_2[rip] + movlpd xmm5,qword ptr ln_b_p_2[rip] + movsd xmm7,xmm2 # x2 + mulsd xmm2,xmm2 # x4 + + movsd xmm12,xmm1 # x + mulsd xmm8,xmm7 # x3 + + movlpd xmm10,qword ptr ln_b_q_3[rip] + movlpd xmm9,qword ptr ln_b_p_3[rip] + + mulsd xmm12,xmm2 # x5 + + mulsd xmm6,xmm7 # q2 + mulsd xmm5,xmm7 # p2 + + movlpd xmm11,qword ptr ln_b_p_4[rip] + + mulsd xmm10,xmm8 # q3 + mulsd xmm9,xmm8 # p3 + + mulsd xmm11,xmm2 # p4 + mulsd xmm2,qword ptr ln_b_q_4[rip] # q4 + + addsd xmm2,xmm12 # q4+x5 + + addsd xmm2,xmm10 # q3+q4+x5 + addsd xmm9,xmm11 # p3+p4 + + movlpd xmm7,qword ptr real_17_bits[rip] + + addsd xmm2,xmm6 # q2+q3+q4+x5 + addsd xmm5,xmm9 # p2+p3+p4 + + andpd xmm7,xmm1 # x_17_h + movsd xmm14,xmm1 # x + movlpd xmm13,qword ptr real_0_5[rip] + + movlpd xmm6,qword ptr real_ln2_42_l[rip] + + addsd xmm2,xmm4 # q1+q2+q3+q4+x5 + addsd xmm3,xmm5 # p1+p2+p3+p4 + + subsd xmm14,xmm7 # x_17_l + mulsd xmm13,xmm7 # 0.5*x_17_h + + mulsd xmm6,xmm0 + mulsd xmm0,qword ptr real_ln2_42[rip] + + addsd xmm2,qword ptr ln_b_q_0[rip] # q0+q1+q2+q3+q4+x5 + addsd xmm3,qword ptr ln_b_p_0[rip] # p0+p1+p2+p3+p4 + + mulsd xmm14,qword ptr real_0_5[rip] # 0.5*x_17_l + mulsd xmm13,xmm7 # 0.5*x_17_h*x_17_h + addsd xmm7,xmm1 # x+x_17_h + + ucomisd xmm1,qword ptr real_2_p_m_16[rip] + jb ln_real_g1_s + +ln_real_g1_l: + divsd xmm3,xmm2 # p/q + + subsd xmm1,xmm13 # x-0.5*x_17_h*x_17_h + mulsd xmm7,xmm14 # 0.5*x_17_l*(x+x_17_h) + + mulsd xmm3,xmm8 # (p/q)*x3 + subsd xmm3,xmm7 # (p/q)*x3-0.5x2_l + addsd xmm3,xmm6 + + + movsd xmm2,xmm0 + addsd xmm0,xmm1 + subsd xmm2,xmm0 + addsd xmm2,xmm1 + addsd xmm2,xmm3 + addsd xmm0,xmm2 + + +# addsd xmm1,xmm3 # (p/q)*x3-0.5x2+x +# addsd xmm0,xmm1 + ret + +ln_real_g1_s: + divsd xmm3,xmm2 # p/q + + mulsd xmm7,xmm14 # 0.5*x_17_l*(x+x_17_h) + + mulsd xmm3,xmm8 # (p/q)*x3 + subsd xmm3,xmm7 # (p/q)*x3-0.5x2_l + addsd xmm3,xmm6 + subsd xmm3,xmm13 # (p/q)*x3-0.5x2 + addsd xmm1,xmm3 # (p/q)*x3-0.5x2_l+x + addsd xmm0,xmm1 + ret + +ln_real_g_7_d_6: + mulsd xmm2,qword ptr real_0_75[rip] + movlpd xmm1,qword ptr real_0_25[rip] + + lea rcx,ln_b_c[rip] + lea rdx,ln_s_c[rip] + + ucomisd xmm2,xmm1 + subsd xmm2,xmm1 + + cmovb rcx,rdx + + movsd xmm1,xmm2 # x + mulsd xmm2,xmm2 # x2 + + movlpd xmm4,qword ptr (ln_b_q_1-ln_b_c)[rcx] + movlpd xmm3,qword ptr (ln_b_p_1-ln_b_c)[rcx] + mulsd xmm4,xmm1 # q1 + mulsd xmm3,xmm1 # p1 + movsd xmm8,xmm1 # x + + cvtdq2pd xmm0,xmm0 + + movlpd xmm6,qword ptr (ln_b_q_2-ln_b_c)[rcx] + movlpd xmm5,qword ptr (ln_b_p_2-ln_b_c)[rcx] + movsd xmm7,xmm2 # x2 + mulsd xmm2,xmm2 # x4 + + movsd xmm12,xmm1 # x + mulsd xmm8,xmm7 # x3 + + movlpd xmm10,qword ptr (ln_b_q_3-ln_b_c)[rcx] + movlpd xmm9,qword ptr (ln_b_p_3-ln_b_c)[rcx] + + mulsd xmm12,xmm2 # x5 + + mulsd xmm6,xmm7 # q2 + mulsd xmm5,xmm7 # p2 + + movlpd xmm11,qword ptr (ln_b_p_4-ln_b_c)[rcx] + + mulsd xmm10,xmm8 # q3 + mulsd xmm9,xmm8 # p3 + + mulsd xmm11,xmm2 # p4 + mulsd xmm2,qword ptr (ln_b_q_4-ln_b_c)[rcx] # q4 + + addsd xmm2,xmm12 # q4+x5 + + addsd xmm2,xmm10 # q3+q4+x5 + addsd xmm9,xmm11 # p3+p4 + + movlpd xmm7,qword ptr real_17_bits[rip] + + addsd xmm2,xmm6 # q2+q3+q4+x5 + addsd xmm5,xmm9 # p2+p3+p4 + + andpd xmm7,xmm1 # x_17_h + movsd xmm14,xmm1 # x + movlpd xmm13,qword ptr real_0_5[rip] + + movlpd xmm6,qword ptr real_ln2_42_l[rip] + + addsd xmm2,xmm4 # q1+q2+q3+q4+x5 + addsd xmm3,xmm5 # p1+p2+p3+p4 + + subsd xmm14,xmm7 # x_17_l + mulsd xmm13,xmm7 # 0.5*x_17_h + + mulsd xmm6,xmm0 + mulsd xmm0,qword ptr real_ln2_42[rip] + + addsd xmm2,qword ptr (ln_b_q_0-ln_b_c)[rcx] # q0+q1+q2+q3+q4+x5 + addsd xmm3,qword ptr (ln_b_p_0-ln_b_c)[rcx] # p0+p1+p2+p3+p4 + + mulsd xmm14,qword ptr real_0_5[rip] # 0.5*x_17_l + mulsd xmm13,xmm7 # 0.5*x_17_h*x_17_h + addsd xmm7,xmm1 # x+x_17_h + + addsd xmm0,qword ptr ln_4_d_3_42[rip] + addsd xmm6,qword ptr ln_4_d_3_42_l[rip] + att_jmp ln_real_g1_l + +ln_real_s: + movlpd xmm3,qword ptr ln_s_q_1[rip] + movlpd xmm2,qword ptr ln_s_p_1[rip] + + mulsd xmm3,xmm1 # q1 + mulsd xmm2,xmm1 # p1 + movsd xmm7,xmm1 # x + movlpd xmm5,qword ptr ln_s_q_2[rip] + movlpd xmm4,qword ptr ln_s_p_2[rip] + movsd xmm6,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movsd xmm11,xmm1 # x + mulsd xmm7,xmm6 # x3 + + movlpd xmm9,qword ptr ln_s_q_3[rip] + movlpd xmm8,qword ptr ln_s_p_3[rip] + + mulsd xmm11,xmm0 # x5 + + mulsd xmm5,xmm6 # q2 + mulsd xmm4,xmm6 # p2 + + movlpd xmm10,qword ptr ln_s_q_4[rip] + + mulsd xmm9,xmm7 # q3 + mulsd xmm8,xmm7 # p3 + + mulsd xmm10,xmm0 # q4 + mulsd xmm0,qword ptr ln_s_p_4[rip] # p4 + + addsd xmm10,xmm11 # q4+x5 + + addsd xmm0,xmm8 # p3+p4 + addsd xmm9,xmm10 # q3+q4+x5 + + movlpd xmm6,qword ptr real_17_bits[rip] + + addsd xmm0,xmm4 # p2+p3+p4 + addsd xmm5,xmm9 # q2+q3+q4+x5 + + andpd xmm6,xmm1 # x_17_h + movsd xmm8,xmm1 # x + movlpd xmm4,qword ptr real_0_5[rip] + + addsd xmm0,xmm2 # p1+p2+p3+p4 + addsd xmm3,xmm5 # q1+q2+q3+q4+x5 + + subsd xmm8,xmm6 # x_17_l + mulsd xmm4,xmm6 # 0.5*x_17_h + + addsd xmm0,qword ptr ln_s_p_0[rip] # p0+p1+p2+p3+p4 + addsd xmm3,qword ptr ln_s_q_0[rip] # q0+q1+q2+q3+q4+x5 + + mulsd xmm8,qword ptr real_0_5[rip] # 0.5*x_17_l + mulsd xmm4,xmm6 # 0.5*x_17_h*x_17_h + addsd xmm6,xmm1 # x+x_17_h + + ucomisd xmm1,qword ptr real_m_2_p_m_16[rip] + ja ln_real_s_s + + divsd xmm0,xmm3 # p/q + + subsd xmm1,xmm4 # x-0.5*x_17_h*x_17_h + mulsd xmm6,xmm8 # 0.5*x_17_l*(x+x_17_h) + + mulsd xmm0,xmm7 # (p/q)*x3 + subsd xmm0,xmm6 # (p/q)*x3-0.5x2_l + addsd xmm0,xmm1 # (p/q)*x3-0.5x2+x + ret + +ln_real_s_s: + divsd xmm0,xmm3 # p/q + + mulsd xmm6,xmm8 # 0.5*x_17_l*(x+x_17_h) + + mulsd xmm0,xmm7 # (p/q)*x3 + subsd xmm0,xmm6 # (p/q)*x3-0.5x2_l + subsd xmm0,xmm4 # (p/q)*x3-0.5x2 + addsd xmm0,xmm1 # (p/q)*x3-0.5x2+x + ret + + +ln_real_l1: + subsd xmm3,qword ptr real_1_0[rip] + + psubq xmm0,xmmword ptr qw_1022[rip] +ln_real_l1_: + movsd xmm1,xmm3 # x + mulsd xmm3,xmm3 # x2 + + movlpd xmm4,qword ptr ln_s_q_1[rip] + movlpd xmm2,qword ptr ln_s_p_1[rip] + mulsd xmm4,xmm1 # q1 + mulsd xmm2,xmm1 # p1 + movsd xmm8,xmm1 # x + + cvtdq2pd xmm0,xmm0 + + movlpd xmm6,qword ptr ln_s_q_2[rip] + movlpd xmm5,qword ptr ln_s_p_2[rip] + movsd xmm7,xmm3 # x2 + mulsd xmm3,xmm3 # x4 + + movsd xmm12,xmm1 # x + mulsd xmm8,xmm7 # x3 + + movlpd xmm10,qword ptr ln_s_q_3[rip] + movlpd xmm9,qword ptr ln_s_p_3[rip] + + mulsd xmm12,xmm3 # x5 + + mulsd xmm6,xmm7 # q2 + mulsd xmm5,xmm7 # p2 + + movlpd xmm11,qword ptr ln_s_p_4[rip] + + mulsd xmm10,xmm8 # q3 + mulsd xmm9,xmm8 # p3 + + mulsd xmm11,xmm3 # p4 + mulsd xmm3,qword ptr ln_s_q_4[rip] # q4 + + addsd xmm3,xmm12 # q4+x5 + + addsd xmm3,xmm10 # q3+q4+x5 + addsd xmm9,xmm11 # p3+p4 + + movlpd xmm7,qword ptr real_17_bits[rip] + + addsd xmm3,xmm6 # q2+q3+q4+x5 + addsd xmm5,xmm9 # p2+p3+p4 + + andpd xmm7,xmm1 # x_17_h + movsd xmm14,xmm1 # x + movlpd xmm13,qword ptr real_0_5[rip] + + movlpd xmm6,qword ptr real_ln2_42_l[rip] + + addsd xmm3,xmm4 # q1+q2+q3+q4+x5 + addsd xmm2,xmm5 # p1+p2+p3+p4 + + subsd xmm14,xmm7 # x_17_l + mulsd xmm13,xmm7 # 0.5*x_17_h + + mulsd xmm6,xmm0 + mulsd xmm0,qword ptr real_ln2_42[rip] + + addsd xmm3,qword ptr ln_s_q_0[rip] # q0+q1+q2+q3+q4+x5 + addsd xmm2,qword ptr ln_s_p_0[rip] # p0+p1+p2+p3+p4 + + mulsd xmm14,qword ptr real_0_5[rip] # 0.5*x_17_l + mulsd xmm13,xmm7 # 0.5*x_17_h*x_17_h + addsd xmm7,xmm1 # x+x_17_h + + ucomisd xmm1,qword ptr real_m_2_p_m_16[rip] + ja ln_real_l1_s + + divsd xmm2,xmm3 # p/q + + subsd xmm1,xmm13 # x-0.5*x_17_h*x_17_h + mulsd xmm7,xmm14 # 0.5*x_17_l*(x+x_17_h) + + mulsd xmm2,xmm8 # (p/q)*x3 + subsd xmm2,xmm7 # (p/q)*x3-0.5x2_l + addsd xmm2,xmm6 + + + movsd xmm3,xmm0 + addsd xmm0,xmm1 + subsd xmm3,xmm0 + addsd xmm3,xmm1 + addsd xmm3,xmm2 + addsd xmm0,xmm3 + +# addsd xmm1,xmm2 # (p/q)*x3-0.5x2+x +# addsd xmm0,xmm1 + ret + +ln_real_l1_s: + divsd xmm2,xmm3 # p/q + + mulsd xmm7,xmm14 # 0.5*x_17_l*(x+x_17_h) + + mulsd xmm2,xmm8 # (p/q)*x3 + subsd xmm2,xmm7 # (p/q)*x3-0.5x2_l + addsd xmm2,xmm6 + subsd xmm2,xmm13 # (p/q)*x3-0.5x2 + addsd xmm1,xmm2 # (p/q)*x3-0.5x2+x + addsd xmm0,xmm1 + ret + +ln_real_less_or_equal_0_or_nan: + jb ln_real_less_0_or_nan # x<0.0 | nan + + movsd xmm0,qword ptr real_m_1_0[rip] + divsd xmm0,qword ptr real_0_0[rip] # yield -inf + ret + +ln_real_less_0_or_nan: + att_jp ln_real_inf_or_nan # nan + + movsd xmm0,qword ptr real_0_0[rip] # yield nan + divsd xmm0,xmm0 + +ln_real_inf_or_nan: + ret + + .globl log10_real + +log10_real: +# ucomisd xmm0,qword ptr real_0_7025 + ucomisd xmm0,qword ptr real_0_833[rip] +# jb log10_real_small # x<0.7025 | nan + jb log10_real_small # x<0.833 | nan + ucomisd xmm0,qword ptr real_1_666[rip] + ja log10_real_large # x>1.666 + + movlpd xmm2,qword ptr real_1_0[rip] + + ucomisd xmm0,qword ptr real_7_d_6[rip] + ja log10_real_a_7_d_6 + + ucomisd xmm0,xmm2 + + subsd xmm0,xmm2 + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + jae log10_real_0 + jmp log10_real_s + +log10_real_small: + ucomisd xmm0,qword ptr real_2_p_m_1022[rip] + jae log10_real_ + + ucomisd xmm0,qword ptr real_0_0[rip] + jbe log10_real_less_or_equal_0_or_nan + +# denormalized number + + mulsd xmm0,qword ptr real_2_p_55[rip] + + movlpd xmm1,qword ptr mask_significand[rip] + andpd xmm1,xmm0 + psrlq xmm0,52 + + movlpd xmm2,qword ptr real_1_0[rip] + movlpd xmm3,qword ptr real_0_5[rip] + orpd xmm2,xmm1 + orpd xmm3,xmm1 + +# ucomisd xmm2,qword ptr real_1_405 + ucomisd xmm2,qword ptr real_1_666[rip] + jbe log10_real_dn_g1 + + subsd xmm3,qword ptr real_1_0[rip] + psubq xmm0,xmmword ptr qw_1077[rip] + jmp log10_real_l1_ + +log10_real_dn_g1: + ucomisd xmm2,qword ptr real_7_d_6[rip] + + subsd xmm2,qword ptr real_1_0[rip] + psubq xmm0,xmmword ptr qw_1078[rip] + + ja log10_real_g_7_d_6 + jmp log10_real_g1_ + +log10_real_large: + ucomisd xmm0,qword ptr real_max[rip] + ja log10_real_inf_or_nan + +log10_real_: + movlpd xmm1,qword ptr mask_significand[rip] + andpd xmm1,xmm0 + psrlq xmm0,52 + + movlpd xmm2,qword ptr real_1_0[rip] + movlpd xmm3,qword ptr real_0_5[rip] + orpd xmm2,xmm1 + orpd xmm3,xmm1 + +# ucomisd xmm2,qword ptr real_1_405 + ucomisd xmm2,qword ptr real_1_666[rip] + jbe log10_real_g1 + jmp log10_real_l1 + +log10_real_0: + movlpd xmm2,qword ptr log10_b_p_1[rip] + movlpd xmm3,qword ptr log10_b_q_1[rip] + mulsd xmm2,xmm1 # p1 + mulsd xmm3,xmm1 # q1 + movsd xmm7,xmm1 # x + + movlpd xmm4,qword ptr log10_b_p_2[rip] + movlpd xmm5,qword ptr log10_b_q_2[rip] + mulsd xmm7,xmm0 # x3 + movsd xmm6,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movlpd xmm8,qword ptr log10_b_p_3[rip] + movlpd xmm9,qword ptr log10_b_q_3[rip] + + mulsd xmm4,xmm6 # p2 + mulsd xmm5,xmm6 # q2 + + movlpd xmm10,qword ptr log10_b_p_4[rip] + + mulsd xmm8,xmm7 # p3 + mulsd xmm9,xmm7 # q3 + + movsd xmm11,xmm7 # x3 + mulsd xmm7,xmm6 # x5 + + mulsd xmm10,xmm0 # p4 + mulsd xmm0,qword ptr log10_b_q_4[rip] # q4 + + addsd xmm0,xmm7 # q4+x5 + + addsd xmm9,xmm0 # q3+q4+x5 + addsd xmm8,xmm10 # p3+p4 + + mulsd xmm6,qword ptr real_0_5[rip] # 0.5x2 + + addsd xmm9,xmm5 # q2+q3+q4+x5 + addsd xmm4,xmm8 # p2+p3+p4 + + addsd xmm9,xmm3 # q1+q2+q3+q4+x5 + addsd xmm2,xmm4 # p1+p2+p3+p4 + + movlpd xmm4,qword ptr real_26_bits[rip] + movsd xmm0,xmm1 # x + + addsd xmm9,qword ptr log10_b_q_0[rip] # q0+q1+q2+q3+q4+x5 + addsd xmm2,qword ptr log10_b_p_0[rip] # p0+p1+p2+p3+p4 + + subsd xmm1,xmm6 # x-0.5x2 + + divsd xmm2,xmm9 # p/q + + andpd xmm4,xmm1 # (x-0.5x2)_h + subsd xmm0,xmm4 # x-(x-0.5x2)_h + subsd xmm0,xmm6 # (x-0.5x2)_l + + mulsd xmm2,xmm11 # (p/q)*x3 + + mulsd xmm1,qword ptr d1ln10_26_l[rip] # (x-0.5x2) * (1/ln 10)_l + mulsd xmm0,qword ptr d1ln10_26[rip] # (x-0.5x2)_l * (1/ln 10)_h + mulsd xmm4,qword ptr d1ln10_26[rip] # (x-0.5x2)_h * (1/ln 10)_h + + addsd xmm0,xmm1 # (x-0.5x2) * (1/ln 10)_l+(x-0.5x2)_l * (1/ln 10)_h + addsd xmm0,xmm2 + addsd xmm0,xmm4 # (x-0.5x2)*(1/ln 10) + (p/q)*x3 + + ret + +#log10_real_0_: +# movlpd xmm2,qword ptr log10_b_p_1 +# movlpd xmm10,qword ptr log10_b_q_1 +# mulsd xmm2,xmm1 # p1 +# mulsd xmm10,xmm1 # q1 +# movsd xmm7,xmm1 # x +# +# movlpd xmm11,qword ptr log10_b_p_2 +# movlpd xmm5,qword ptr log10_b_q_2 +# mulsd xmm7,xmm0 # x3 +# movsd xmm6,xmm0 # x2 +# mulsd xmm0,xmm0 # x4 +# +# movlpd xmm8,qword ptr log10_b_p_3 +# movlpd xmm9,qword ptr log10_b_q_3 +# +## mulsd xmm11,xmm6 # p2 +# mulsd xmm5,xmm6 # q2 +# +# movlpd xmm3,qword ptr log10_b_p_4 +# +# mulsd xmm8,xmm7 # p3 +# mulsd xmm9,xmm7 # q3 +# +# mulsd xmm6,xmm7 # x5 +# +# mulsd xmm3,xmm0 # p4 +# mulsd xmm0,qword ptr log10_b_q_4 # q4 +# +# movsd xmm4,xmm1 # x +# addsd xmm0,xmm6 # q4+x5 +# movlpd xmm6,qword ptr real_m_0_5 +# +# addsd xmm9,xmm0 # q3+q4+x5 +# addsd xmm8,xmm3 # p3+p4 +# +# movlpd xmm0,qword ptr real_14_bits +# +# andpd xmm0,xmm1 # x_h +# mulsd xmm6,xmm1 # -0.5x +# +# addsd xmm9,xmm5 # q2+q3+q4+x5 +# addsd xmm11,xmm8 # p2+p3+p4 +# +# movsd xmm3,xmm1 # x +# subsd xmm4,xmm0 # x_l +# addsd xmm6,qword ptr real_1_0 # 1-0.5x +# movlpd xmm5,qword ptr real_13_bits +# +# addsd xmm9,xmm10 # q1+q2+q3+q4+x5 +# addsd xmm2,xmm11 # p1+p2+p3+p4 +# +# andpd xmm5,xmm6 # (1-0.5x)_h +# mulsd xmm3,xmm6 # (1-0.5x)*x = x-0.5x2 +# +# addsd xmm9,qword ptr log10_b_q_0 # q0+q1+q2+q3+q4+x5 +# addsd xmm2,qword ptr log10_b_p_0 # p0+p1+p2+p3+p4 +# +# subsd xmm6,xmm5 # (1-0.5x)_l +# mulsd xmm0,xmm5 # (1-0.5x)_h*x_h = (x-0.5x2)_h +# mulsd xmm4,xmm5 # (1-0.5x)_h*x_l +# +# divsd xmm2,xmm9 # p/q +# +# mulsd xmm3,qword ptr d1ln10_26_l # (x-0.5x2) * (1/ln 10)_l +# mulsd xmm6,xmm1 # (1-0.5x)_l*x +# mulsd xmm0,qword ptr d1ln10_26 # (x-0.5x2)_h * (1/ln 10)_h +# +# addsd xmm6,xmm4 # (x-0.5x2)_l +# +# mulsd xmm6,qword ptr d1ln10_26 # (x-0.5x2)_l * (1/ln 10)_h +# +# mulsd xmm2,xmm7 # (p/q)*x3 +# +# addsd xmm6,xmm3 +# +# addsd xmm2,xmm6 +# +# addsd xmm0,xmm2 +# +# ret + +log10_real_a_7_d_6: + subsd xmm0,xmm2 + + mulsd xmm0,qword ptr real_0_75[rip] + movlpd xmm1,qword ptr real_0_25[rip] + + lea rcx,log10_b_c[rip] + lea rdx,log10_s_c[rip] + + ucomisd xmm0,xmm1 + subsd xmm0,xmm1 + + cmovb rcx,rdx + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + movlpd xmm2,qword ptr (log10_b_p_1-log10_b_c)[rcx] + movlpd xmm3,qword ptr (log10_b_q_1-log10_b_c)[rcx] + mulsd xmm2,xmm1 # p1 + mulsd xmm3,xmm1 # q1 + movsd xmm7,xmm1 # x + + movlpd xmm4,qword ptr (log10_b_p_2-log10_b_c)[rcx] + movlpd xmm5,qword ptr (log10_b_q_2-log10_b_c)[rcx] + mulsd xmm7,xmm0 # x3 + movsd xmm6,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movlpd xmm8,qword ptr (log10_b_p_3-log10_b_c)[rcx] + movlpd xmm9,qword ptr (log10_b_q_3-log10_b_c)[rcx] + + mulsd xmm4,xmm6 # p2 + mulsd xmm5,xmm6 # q2 + + movlpd xmm10,qword ptr (log10_b_p_4-log10_b_c)[rcx] + + mulsd xmm8,xmm7 # p3 + mulsd xmm9,xmm7 # q3 + + movsd xmm11,xmm7 # x3 + mulsd xmm7,xmm6 # x5 + + mulsd xmm10,xmm0 # p4 + mulsd xmm0,qword ptr (log10_b_q_4-log10_b_c)[rcx] # q4 + + addsd xmm0,xmm7 # q4+x5 + + addsd xmm9,xmm0 # q3+q4+x5 + addsd xmm8,xmm10 # p3+p4 + + mulsd xmm6,qword ptr real_0_5[rip] # 0.5x2 + + addsd xmm9,xmm5 # q2+q3+q4+x5 + addsd xmm4,xmm8 # p2+p3+p4 + + addsd xmm9,xmm3 # q1+q2+q3+q4+x5 + addsd xmm2,xmm4 # p1+p2+p3+p4 + + movlpd xmm4,qword ptr real_26_bits[rip] + movsd xmm0,xmm1 # x + + addsd xmm9,qword ptr (log10_b_q_0-log10_b_c)[rcx] # q0+q1+q2+q3+q4+x5 + addsd xmm2,qword ptr (log10_b_p_0-log10_b_c)[rcx] # p0+p1+p2+p3+p4 + + subsd xmm1,xmm6 # x-0.5x2 + + divsd xmm2,xmm9 # p/q + + andpd xmm4,xmm1 # (x-0.5x2)_h + subsd xmm0,xmm4 # x-(x-0.5x2)_h + subsd xmm0,xmm6 # (x-0.5x2)_l + + mulsd xmm2,xmm11 # (p/q)*x3 + + mulsd xmm1,qword ptr d1ln10_26_l[rip] # (x-0.5x2) * (1/ln 10)_l + mulsd xmm0,qword ptr d1ln10_26[rip] # (x-0.5x2)_l * (1/ln 10)_h + mulsd xmm4,qword ptr d1ln10_26[rip] # (x-0.5x2)_h * (1/ln 10)_h + + addsd xmm0,xmm1 # (x-0.5x2) * (1/ln 10)_l+(x-0.5x2)_l * (1/ln 10)_h + + movlpd xmm3,qword ptr log10_4_d_3_53[rip] + movlpd xmm5,qword ptr log10_4_d_3_53[rip] + + addsd xmm3,xmm4 + addsd xmm0,qword ptr log10_4_d_3_53_l[rip] + + subsd xmm5,xmm3 + + addsd xmm5,xmm4 + + addsd xmm0,xmm5 + + addsd xmm0,xmm2 + addsd xmm0,xmm3 # (x-0.5x2)*(1/ln 10) + (p/q)*x3 + ret + +log10_real_g1: + ucomisd xmm2,qword ptr real_7_d_6[rip] + + subsd xmm2,qword ptr real_1_0[rip] + + psubq xmm0,xmmword ptr qw_1023[rip] + + att_ja log10_real_g_7_d_6 + +log10_real_g1_: + movsd xmm1,xmm2 # x + mulsd xmm2,xmm2 # x2 + + movlpd xmm11,qword ptr log10_b_p_1[rip] + movlpd xmm10,qword ptr log10_b_q_1[rip] + mulsd xmm11,xmm1 # p1 + mulsd xmm10,xmm1 # q1 + movsd xmm7,xmm1 # x + + cvtdq2pd xmm12,xmm0 + + movlpd xmm0,qword ptr log10_b_p_2[rip] + movlpd xmm5,qword ptr log10_b_q_2[rip] + mulsd xmm7,xmm2 # x3 + movsd xmm6,xmm2 # x2 + mulsd xmm2,xmm2 # x4 + + movlpd xmm8,qword ptr log10_b_p_3[rip] + movlpd xmm9,qword ptr log10_b_q_3[rip] + + mulsd xmm0,xmm6 # p2 + mulsd xmm5,xmm6 # q2 + + movlpd xmm3,qword ptr log10_b_p_4[rip] + + mulsd xmm8,xmm7 # p3 + mulsd xmm9,xmm7 # q3 + + mulsd xmm6,xmm7 # x5 + + mulsd xmm3,xmm2 # p4 + mulsd xmm2,qword ptr log10_b_q_4[rip] # q4 + + movsd xmm4,xmm1 # x + addsd xmm2,xmm6 # q4+x5 + movlpd xmm6,qword ptr real_m_0_5[rip] + + addsd xmm9,xmm2 # q3+q4+x5 + addsd xmm8,xmm3 # p3+p4 + + movlpd xmm13,qword ptr real_log2_10_42_l[rip] + movlpd xmm2,qword ptr real_14_bits[rip] + + andpd xmm2,xmm1 # x_h + mulsd xmm6,xmm1 # -0.5x + + addsd xmm9,xmm5 # q2+q3+q4+x5 + addsd xmm0,xmm8 # p2+p3+p4 + + mulsd xmm13,xmm12 + mulsd xmm12,qword ptr real_log2_10_42[rip] + + movsd xmm3,xmm1 # x + subsd xmm4,xmm2 # x_l + addsd xmm6,qword ptr real_1_0[rip] # 1-0.5x + movlpd xmm5,qword ptr real_13_bits[rip] + + addsd xmm9,xmm10 # q1+q2+q3+q4+x5 + addsd xmm0,xmm11 # p1+p2+p3+p4 + + andpd xmm5,xmm6 # (1-0.5x)_h + mulsd xmm3,xmm6 # (1-0.5x)*x = x-0.5x2 + + addsd xmm9,qword ptr log10_b_q_0[rip] # q0+q1+q2+q3+q4+x5 + addsd xmm0,qword ptr log10_b_p_0[rip] # p0+p1+p2+p3+p4 + + subsd xmm6,xmm5 # (1-0.5x)_l + mulsd xmm2,xmm5 # (1-0.5x)_h*x_h = (x-0.5x2)_h + mulsd xmm4,xmm5 # (1-0.5x)_h*x_l + + divsd xmm0,xmm9 # p/q + + mulsd xmm3,qword ptr d1ln10_26_l[rip] # (x-0.5x2) * (1/ln 10)_l + mulsd xmm6,xmm1 # (1-0.5x)_l*x + mulsd xmm2,qword ptr d1ln10_26[rip] # (x-0.5x2)_h * (1/ln 10)_h + + addsd xmm6,xmm4 # (x-0.5x2)_l + + mulsd xmm6,qword ptr d1ln10_26[rip] # (x-0.5x2)_l * (1/ln 10)_h + + mulsd xmm0,xmm7 # (p/q)*x3 + + addsd xmm6,xmm3 + + addsd xmm0,xmm6 + + addsd xmm0,xmm13 + + movsd xmm3,xmm2 + addsd xmm2,xmm12 + subsd xmm12,xmm2 + addsd xmm12,xmm3 + addsd xmm0,xmm12 + addsd xmm0,xmm2 + +# addsd xmm0,xmm2 +# addsd xmm0,xmm12 + + ret + +log10_real_g_7_d_6: + mulsd xmm2,qword ptr real_0_75[rip] + movlpd xmm1,qword ptr real_0_25[rip] + + lea rcx,log10_b_c[rip] + lea rdx,log10_s_c[rip] + + ucomisd xmm2,xmm1 + subsd xmm2,xmm1 + + cmovb rcx,rdx + + movsd xmm1,xmm2 # x + mulsd xmm2,xmm2 # x2 + + movlpd xmm3,qword ptr (log10_b_p_1-log10_b_c)[rcx] + movlpd xmm4,qword ptr (log10_b_q_1-log10_b_c)[rcx] + mulsd xmm3,xmm1 # p1 + mulsd xmm4,xmm1 # q1 + movsd xmm8,xmm1 # x + + cvtdq2pd xmm0,xmm0 + + movlpd xmm5,qword ptr (log10_b_p_2-log10_b_c)[rcx] + movlpd xmm6,qword ptr (log10_b_q_2-log10_b_c)[rcx] + mulsd xmm8,xmm2 # x3 + movsd xmm7,xmm2 # x2 + mulsd xmm2,xmm2 # x4 + + movlpd xmm9,qword ptr (log10_b_p_3-log10_b_c)[rcx] + movlpd xmm10,qword ptr (log10_b_q_3-log10_b_c)[rcx] + + mulsd xmm5,xmm7 # p2 + mulsd xmm6,xmm7 # q2 + + movlpd xmm11,qword ptr (log10_b_p_4-log10_b_c)[rcx] + + mulsd xmm9,xmm8 # p3 + mulsd xmm10,xmm8 # q3 + + movsd xmm12,xmm8 # x3 + mulsd xmm8,xmm7 # x5 + + mulsd xmm11,xmm2 # p4 + mulsd xmm2,qword ptr (log10_b_q_4-log10_b_c)[rcx] # q4 + + addsd xmm2,xmm8 # q4+x5 + + addsd xmm10,xmm2 # q3+q4+x5 + addsd xmm9,xmm11 # p3+p4 + + mulsd xmm7,qword ptr real_0_5[rip] # 0.5x2 + + addsd xmm10,xmm6 # q2+q3+q4+x5 + addsd xmm5,xmm9 # p2+p3+p4 + + movlpd xmm6,qword ptr real_log2_10_42_l[rip] + + addsd xmm10,xmm4 # q1+q2+q3+q4+x5 + addsd xmm3,xmm5 # p1+p2+p3+p4 + + movlpd xmm5,qword ptr real_26_bits[rip] + movsd xmm2,xmm1 # x + + mulsd xmm6,xmm0 + mulsd xmm0,qword ptr real_log2_10_42[rip] + + addsd xmm10,qword ptr (log10_b_q_0-log10_b_c)[rcx] # q0+q1+q2+q3+q4+x5 + addsd xmm3,qword ptr (log10_b_p_0-log10_b_c)[rcx] # p0+p1+p2+p3+p4 + + subsd xmm1,xmm7 # x-0.5x2 + + addsd xmm6,qword ptr log10_4_d_3_42_l[rip] + addsd xmm0,qword ptr log10_4_d_3_42[rip] + + divsd xmm3,xmm10 # p/q + + andpd xmm5,xmm1 # (x-0.5x2)_h + subsd xmm2,xmm5 # x-(x-0.5x2)_h + subsd xmm2,xmm7 # (x-0.5x2)_l + + mulsd xmm3,xmm12 # (p/q)*x3 + + mulsd xmm1,qword ptr d1ln10_26_l[rip] # (x-0.5x2) * (1/ln 10)_l + mulsd xmm2,qword ptr d1ln10_26[rip] # (x-0.5x2)_l * (1/ln 10)_h + mulsd xmm5,qword ptr d1ln10_26[rip] # (x-0.5x2)_h * (1/ln 10)_h + + addsd xmm2,xmm1 # (x-0.5x2) * (1/ln 10)_l+(x-0.5x2)_l * (1/ln 10)_h + addsd xmm2,xmm3 + addsd xmm2,xmm6 + + movsd xmm3,xmm0 + addsd xmm0,xmm5 + subsd xmm3,xmm0 + addsd xmm3,xmm5 + addsd xmm3,xmm2 + addsd xmm0,xmm3 + +# addsd xmm2,xmm5 # (x-0.5x2)*(1/ln 10) + (p/q)*x3 +# addsd xmm0,xmm2 + ret + +log10_real_s: + movlpd xmm2,qword ptr log10_s_p_1[rip] + movlpd xmm3,qword ptr log10_s_q_1[rip] + mulsd xmm2,xmm1 # p1 + mulsd xmm3,xmm1 # q1 + movsd xmm7,xmm1 # x + + movlpd xmm4,qword ptr log10_s_p_2[rip] + movlpd xmm5,qword ptr log10_s_q_2[rip] + mulsd xmm7,xmm0 # x3 + movsd xmm6,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + movlpd xmm8,qword ptr log10_s_p_3[rip] + movlpd xmm9,qword ptr log10_s_q_3[rip] + + mulsd xmm4,xmm6 # p2 + mulsd xmm5,xmm6 # q2 + + movlpd xmm10,qword ptr log10_s_p_4[rip] + + mulsd xmm8,xmm7 # p3 + mulsd xmm9,xmm7 # q3 + + movsd xmm11,xmm7 # x3 + mulsd xmm7,xmm6 # x5 + + mulsd xmm10,xmm0 # p4 + mulsd xmm0,qword ptr log10_s_q_4[rip] # q4 + + addsd xmm0,xmm7 # q4+x5 + + addsd xmm9,xmm0 # q3+q4+x5 + addsd xmm8,xmm10 # p3+p4 + + mulsd xmm6,qword ptr real_0_5[rip] # 0.5x2 + + addsd xmm9,xmm5 # q2+q3+q4+x5 + addsd xmm4,xmm8 # p2+p3+p4 + + addsd xmm9,xmm3 # q1+q2+q3+q4+x5 + addsd xmm2,xmm4 # p1+p2+p3+p4 + + movlpd xmm4,qword ptr real_26_bits[rip] + movsd xmm0,xmm1 # x + + addsd xmm9,qword ptr log10_s_q_0[rip] # q0+q1+q2+q3+q4+x5 + addsd xmm2,qword ptr log10_s_p_0[rip] # p0+p1+p2+p3+p4 + + subsd xmm1,xmm6 # x-0.5x2 + + divsd xmm2,xmm9 # p/q + + andpd xmm4,xmm1 # (x-0.5x2)_h + subsd xmm0,xmm4 # x-(x-0.5x2)_h + subsd xmm0,xmm6 # (x-0.5x2)_l + + mulsd xmm2,xmm11 # (p/q)*x3 + + mulsd xmm1,qword ptr d1ln10_26_l[rip] # (x-0.5x2) * (1/ln 10)_l + mulsd xmm0,qword ptr d1ln10_26[rip] # (x-0.5x2)_l * (1/ln 10)_h + mulsd xmm4,qword ptr d1ln10_26[rip] # (x-0.5x2)_h * (1/ln 10)_h + + addsd xmm0,xmm1 # (x-0.5x2) * (1/ln 10)_l+(x-0.5x2)_l * (1/ln 10)_h + addsd xmm0,xmm2 + addsd xmm0,xmm4 # (x-0.5x2)*(1/ln 10) + (p/q)*x3 + + ret + +log10_real_l1: + subsd xmm3,qword ptr real_1_0[rip] + + psubq xmm0,xmmword ptr qw_1022[rip] +log10_real_l1_: + movsd xmm1,xmm3 # x + mulsd xmm3,xmm3 # x2 + + movlpd xmm2,qword ptr log10_s_p_1[rip] + movlpd xmm4,qword ptr log10_s_q_1[rip] + mulsd xmm2,xmm1 # p1 + mulsd xmm4,xmm1 # q1 + movsd xmm8,xmm1 # x + + cvtdq2pd xmm0,xmm0 + + movlpd xmm5,qword ptr log10_s_p_2[rip] + movlpd xmm6,qword ptr log10_s_q_2[rip] + mulsd xmm8,xmm3 # x3 + movsd xmm7,xmm3 # x2 + mulsd xmm3,xmm3 # x4 + + movlpd xmm9,qword ptr log10_s_p_3[rip] + movlpd xmm10,qword ptr log10_s_q_3[rip] + + mulsd xmm5,xmm7 # p2 + mulsd xmm6,xmm7 # q2 + + movlpd xmm11,qword ptr log10_s_p_4[rip] + + mulsd xmm9,xmm8 # p3 + mulsd xmm10,xmm8 # q3 + + movsd xmm12,xmm8 # x3 + mulsd xmm8,xmm7 # x5 + + mulsd xmm11,xmm3 # p4 + mulsd xmm3,qword ptr log10_s_q_4[rip] # q4 + + addsd xmm3,xmm8 # q4+x5 + + addsd xmm10,xmm3 # q3+q4+x5 + addsd xmm9,xmm11 # p3+p4 + + mulsd xmm7,qword ptr real_0_5[rip] # 0.5x2 + + addsd xmm10,xmm6 # q2+q3+q4+x5 + addsd xmm5,xmm9 # p2+p3+p4 + + movlpd xmm6,qword ptr real_log2_10_42_l[rip] + + addsd xmm10,xmm4 # q1+q2+q3+q4+x5 + addsd xmm2,xmm5 # p1+p2+p3+p4 + + movlpd xmm5,qword ptr real_26_bits[rip] + movsd xmm3,xmm1 # x + + mulsd xmm6,xmm0 + mulsd xmm0,qword ptr real_log2_10_42[rip] + + addsd xmm10,qword ptr log10_s_q_0[rip] # q0+q1+q2+q3+q4+x5 + addsd xmm2,qword ptr log10_s_p_0[rip] # p0+p1+p2+p3+p4 + + subsd xmm1,xmm7 # x-0.5x2 + + divsd xmm2,xmm10 # p/q + + andpd xmm5,xmm1 # (x-0.5x2)_h + subsd xmm3,xmm5 # x-(x-0.5x2)_h + subsd xmm3,xmm7 # (x-0.5x2)_l + + mulsd xmm2,xmm12 # (p/q)*x3 + + mulsd xmm1,qword ptr d1ln10_26_l[rip] # (x-0.5x2) * (1/ln 10)_l + mulsd xmm3,qword ptr d1ln10_26[rip] # (x-0.5x2)_l * (1/ln 10)_h + mulsd xmm5,qword ptr d1ln10_26[rip] # (x-0.5x2)_h * (1/ln 10)_h + + addsd xmm3,xmm1 # (x-0.5x2) * (1/ln 10)_l+(x-0.5x2)_l * (1/ln 10)_h + addsd xmm3,xmm2 + addsd xmm3,xmm6 + + movsd xmm2,xmm0 + addsd xmm0,xmm5 + subsd xmm2,xmm0 + addsd xmm2,xmm5 + addsd xmm2,xmm3 + addsd xmm0,xmm2 + +# addsd xmm3,xmm5 # (x-0.5x2)*(1/ln 10) + (p/q)*x3 +# addsd xmm0,xmm3 + ret + +log10_real_less_or_equal_0_or_nan: + jb log10_real_less_0_or_nan # x<0.0 | nan + + movsd xmm0,qword ptr real_m_1_0[rip] + divsd xmm0,qword ptr real_0_0[rip] # yield -inf + ret + +log10_real_less_0_or_nan: + att_jp log10_real_inf_or_nan # nan + + movsd xmm0,qword ptr real_0_0[rip] # yield nan + divsd xmm0,xmm0 + +log10_real_inf_or_nan: + ret + + + .globl pow_real + +pow_real: + ucomisd xmm0,qword ptr real_0_0[rip] + je pow_real_to_zero_or_nan # y==0.0 | nan + + ucomisd xmm1,qword ptr real_0_83[rip] + jb pow_real_small # x<0.83 | nan +pow_real_not_small: + ucomisd xmm1,qword ptr real_1_66[rip] + ja pow_real_large + + ucomisd xmm1,qword ptr real_7_d_6[rip] + + movlpd xmm2,qword ptr real_1_0[rip] + + ja pow_real_a_7_d_6 + +pow_real_between_0_83_and_7_d_6: + ucomisd xmm0,qword ptr real_power_exp_too_large[rip] + ja pow_real_exp_too_large + + ucomisd xmm0,qword ptr real_power_exp_too_small[rip] + jb pow_real_exp_too_large_m + + ucomisd xmm1,xmm2 + + subsd xmm1,xmm2 + + lea rcx,log2_b_c[rip] + lea rdx,log2_s_c[rip] + cmovb rcx,rdx + + movsd xmm5,xmm1 # x + mulsd xmm1,xmm1 # x2 + + movlpd xmm2,qword ptr (log2_b_p_1-log2_b_c)[rcx] + movlpd xmm4,qword ptr (log2_b_q_1-log2_b_c)[rcx] + mulsd xmm2,xmm5 # p1 + mulsd xmm4,xmm5 # q1 + movsd xmm8,xmm5 # x + movlpd xmm3,qword ptr (log2_b_p_2-log2_b_c)[rcx] + movlpd xmm6,qword ptr (log2_b_q_2-log2_b_c)[rcx] + mulsd xmm8,xmm1 # x3 + movsd xmm7,xmm1 # x2 + mulsd xmm1,xmm1 # x4 + + movlpd xmm9,qword ptr (log2_b_p_3-log2_b_c)[rcx] + movlpd xmm10,qword ptr (log2_b_q_3-log2_b_c)[rcx] + + mulsd xmm3,xmm7 # p2 + mulsd xmm6,xmm7 # q2 + + movlpd xmm11,qword ptr (log2_b_p_4-log2_b_c)[rcx] + + mulsd xmm9,xmm8 # p3 + mulsd xmm10,xmm8 # q3 + + mulsd xmm11,xmm1 # p4 + + addsd xmm10,xmm1 # q3+x4 + addsd xmm9,xmm11 # p3+p4 + + mulsd xmm7,qword ptr real_0_5[rip] # 0.5x2 + + addsd xmm10,xmm6 # q2+q3+x4 + addsd xmm3,xmm9 # p2+p3+p4 + addsd xmm10,xmm4 # q1+q2+q3+x4 + addsd xmm2,xmm3 # p1+p2+p3+p4 + + movlpd xmm3,qword ptr real_26_bits[rip] + movsd xmm1,xmm5 # x + + addsd xmm10,qword ptr (log2_b_q_0-log2_b_c)[rcx] # q0+q1+q2+q3+x4 + addsd xmm2,qword ptr (log2_b_p_0-log2_b_c)[rcx] # p0+p1+p2+p3+p4 + + subsd xmm5,xmm7 # x-0.5x2 + + divsd xmm2,xmm10 # p/q + + andpd xmm3,xmm5 # (x-0.5x2)_h + subsd xmm1,xmm3 # x-(x-0.5x2)_h + subsd xmm1,xmm7 # (x-0.5x2)_l + + mulsd xmm2,xmm8 # (p/q)*x3 + + mulsd xmm5,qword ptr d1ln2_26_l[rip] # (x-0.5x2) * (1/ln 2)_l + mulsd xmm1,qword ptr d1ln2_26[rip] # (x-0.5x2)_l * (1/ln 2)_h + mulsd xmm3,qword ptr d1ln2_26[rip] # (x-0.5x2)_h * (1/ln 2)_h + + addsd xmm1,xmm5 # (x-0.5x2) * (1/ln 2)_l+(x-0.5x2)_l * (1/ln 2)_h + + + movsd xmm5,xmm1 # xl + addsd xmm1,xmm2 # xl+r + movlpd xmm6,qword ptr real_26_bits[rip] + + addsd xmm1,xmm3 # xh+xl+r + + andpd xmm1,xmm6 # (log2 x)_h + andpd xmm6,xmm0 # y_h + + subsd xmm3,xmm1 # xh-(log2 x)_h + jmp pow_real_2 + +pow_real_a_7_d_6: + ucomisd xmm0,qword ptr real_4605_0[rip] + jae pow_real_overflow # because 1.1666666666666667407^4605>2^1024 + + ucomisd xmm0,qword ptr real_m_4834_0[rip] + jbe pow_real_underflow # because 1.1666666666666667407^-4834<2^-1075 + + subsd xmm1,xmm2 + + mulsd xmm1,qword ptr real_0_75[rip] + lea rcx,log2_b_c[rip] + lea rdx,log2_s_c[rip] + + movlpd xmm2,qword ptr real_0_25[rip] + + ucomisd xmm1,xmm2 + subsd xmm1,xmm2 + + cmovb rcx,rdx + + movsd xmm5,xmm1 # x + mulsd xmm1,xmm1 # x2 + + movlpd xmm2,qword ptr (log2_b_p_1-log2_b_c)[rcx] + movlpd xmm4,qword ptr (log2_b_q_1-log2_b_c)[rcx] + mulsd xmm2,xmm5 # p1 + mulsd xmm4,xmm5 # q1 + movsd xmm8,xmm5 # x + movlpd xmm3,qword ptr (log2_b_p_2-log2_b_c)[rcx] + movlpd xmm6,qword ptr (log2_b_q_2-log2_b_c)[rcx] + mulsd xmm8,xmm1 # x3 + movsd xmm7,xmm1 # x2 + mulsd xmm1,xmm1 # x4 + + movlpd xmm9,qword ptr (log2_b_p_3-log2_b_c)[rcx] + movlpd xmm10,qword ptr (log2_b_q_3-log2_b_c)[rcx] + + mulsd xmm3,xmm7 # p2 + mulsd xmm6,xmm7 # q2 + + movlpd xmm11,qword ptr (log2_b_p_4-log2_b_c)[rcx] + + mulsd xmm9,xmm8 # p3 + mulsd xmm10,xmm8 # q3 + + mulsd xmm11,xmm1 # p4 + + addsd xmm10,xmm1 # q3+x4 + addsd xmm9,xmm11 # p3+p4 + + mulsd xmm7,qword ptr real_0_5[rip] # 0.5x2 + + addsd xmm10,xmm6 # q2+q3+x4 + addsd xmm3,xmm9 # p2+p3+p4 + addsd xmm10,xmm4 # q1+q2+q3+x4 + addsd xmm2,xmm3 # p1+p2+p3+p4 + + movlpd xmm3,qword ptr real_26_bits[rip] + movsd xmm1,xmm5 # x + + addsd xmm10,qword ptr (log2_b_q_0-log2_b_c)[rcx] # q0+q1+q2+q3+x4 + addsd xmm2,qword ptr (log2_b_p_0-log2_b_c)[rcx] # p0+p1+p2+p3+p4 + + subsd xmm5,xmm7 # x-0.5x2 + + divsd xmm2,xmm10 # p/q + + andpd xmm3,xmm5 # (x-0.5x2)_h + subsd xmm1,xmm3 # x-(x-0.5x2)_h + subsd xmm1,xmm7 # (x-0.5x2)_l + + mulsd xmm2,xmm8 # (p/q)*x3 + + mulsd xmm5,qword ptr d1ln2_26_l[rip] # (x-0.5x2) * (1/ln 2)_l + mulsd xmm1,qword ptr d1ln2_26[rip] # (x-0.5x2)_l * (1/ln 2)_h + mulsd xmm3,qword ptr d1ln2_26[rip] # (x-0.5x2)_h * (1/ln 2)_h + + + movlpd xmm6,qword ptr log2_4_d_3_53[rip] + movsd xmm7,xmm3 # xh + addsd xmm3,xmm6 # xh+(log2 4/3)_h + + addsd xmm1,xmm5 # (x-0.5x2) * (1/ln 2)_l+(x-0.5x2)_l * (1/ln 2)_h + + subsd xmm6,xmm3 # (log2 4/3)_h-(xh+(log2 4/3)_h) + + addsd xmm6,xmm7 # ((log2 4/3)_h-(xh+(log2 4/3)_h))+xh + addsd xmm1,qword ptr log2_4_d_3_53_l[rip] # xl+(log2 4/3)_l + + addsd xmm1,xmm6 # (x+log2 4/3)_l + + movsd xmm5,xmm1 # xl + addsd xmm1,xmm2 # xl+r + movlpd xmm6,qword ptr real_26_bits[rip] + + addsd xmm1,xmm3 # xh+xl+r + + andpd xmm1,xmm6 # (log2 x)_h + andpd xmm6,xmm0 # y_h + + subsd xmm3,xmm1 # xh-(log2 x)_h + att_jmp pow_real_2 + +pow_real_small: + ucomisd xmm1,qword ptr real_2_p_m_1022[rip] + jb pow_real_negative_zero_denormalized_or_nan + +pow_real_small_: + ucomisd xmm0,qword ptr real_4000_0[rip] + att_jae pow_real_underflow # because 0.83^4000<2^-1075 + + ucomisd xmm0,qword ptr real_m_3810_0[rip] + ja log2_real_ + att_jmp pow_real_overflow # because 0.83^-3810>2^1024 + +pow_real_large: + ucomisd xmm1,qword ptr real_max[rip] + ja pow_real_inf + + ucomisd xmm0,qword ptr real_1401_0[rip] + att_jae pow_real_overflow # because 1.66^1401>2^1024 + + ucomisd xmm0,qword ptr real_m_1471_0[rip] + att_jbe pow_real_underflow # because 1.66^-1471<2^-1075 + +log2_real_: + movlpd xmm2,qword ptr mask_significand[rip] + andpd xmm2,xmm1 + psrlq xmm1,52 + +log2_real__: + movlpd xmm3,qword ptr real_1_0[rip] + movlpd xmm4,qword ptr real_0_5[rip] + orpd xmm3,xmm2 + orpd xmm4,xmm2 + + ucomisd xmm3,qword ptr real_1_66[rip] + + movlpd xmm5,qword ptr real_1_0[rip] + + ja log2_real_l1 + + ucomisd xmm3,qword ptr real_7_d_6[rip] + ja pow_real_g_7_d_6 + +log2_real_g1: + subsd xmm3,xmm5 + + psubq xmm1,xmmword ptr qw_1023[rip] + + movsd xmm6,xmm3 # x + mulsd xmm3,xmm3 # x2 + + movlpd xmm2,qword ptr log2_b_p_1[rip] + movlpd xmm5,qword ptr log2_b_q_1[rip] + mulsd xmm2,xmm6 # p1 + mulsd xmm5,xmm6 # q1 + movsd xmm9,xmm6 # x + + cvtdq2pd xmm4,xmm1 + + movlpd xmm1,qword ptr log2_b_p_2[rip] + movlpd xmm7,qword ptr log2_b_q_2[rip] + mulsd xmm9,xmm3 # x3 + movsd xmm8,xmm3 # x2 + mulsd xmm3,xmm3 # x4 + + movlpd xmm10,qword ptr log2_b_p_3[rip] + movlpd xmm11,qword ptr log2_b_q_3[rip] + + mulsd xmm1,xmm8 # p2 + mulsd xmm7,xmm8 # q2 + + movlpd xmm12,qword ptr log2_b_p_4[rip] + + mulsd xmm10,xmm9 # p3 + mulsd xmm11,xmm9 # q3 + + mulsd xmm12,xmm3 # p4 + + addsd xmm11,xmm3 # q3+x4 + addsd xmm10,xmm12 # p3+p4 + + mulsd xmm8,qword ptr real_0_5[rip] # 0.5x2 + + addsd xmm11,xmm7 # q2+q3+x4 + addsd xmm1,xmm10 # p2+p3+p4 + addsd xmm11,xmm5 # q1+q2+q3+x4 + addsd xmm2,xmm1 # p1+p2+p3+p4 + + movlpd xmm3,qword ptr real_26_bits[rip] + movsd xmm1,xmm6 # x + + addsd xmm11,qword ptr log2_b_q_0[rip] # q0+q1+q2+q3+x4 + addsd xmm2,qword ptr log2_b_p_0[rip] # p0+p1+p2+p3+p4 + +log2_real_gl: + subsd xmm6,xmm8 # x-0.5x2 + + divsd xmm2,xmm11 # p/q + + andpd xmm3,xmm6 # (x-0.5x2)_h + subsd xmm1,xmm3 # x-(x-0.5x2)_h + subsd xmm1,xmm8 # (x-0.5x2)_l + + mulsd xmm2,xmm9 # (p/q)*x3 + + mulsd xmm6,qword ptr d1ln2_26_l[rip] # (x-0.5x2) * (1/ln 2)_l + mulsd xmm1,qword ptr d1ln2_26[rip] # (x-0.5x2)_l * (1/ln 2)_h + mulsd xmm3,qword ptr d1ln2_26[rip] # (x-0.5x2)_h * (1/ln 2)_h + + addsd xmm1,xmm6 # (x-0.5x2) * (1/ln 2)_l+(x-0.5x2)_l * (1/ln 2)_h + + movsd xmm5,xmm1 # xl + addsd xmm1,xmm2 # xl+r + movlpd xmm6,qword ptr real_26_bits[rip] + + addsd xmm1,xmm3 # xh+xl+r + + addsd xmm1,xmm4 # e+xh+xl+r + + andpd xmm1,xmm6 # (log2 x)_h + andpd xmm6,xmm0 # y_h + + subsd xmm4,xmm1 # e-(log2 x)_h + + addsd xmm3,xmm4 # e+xh-(log2 x)_h + att_jmp pow_real_2 + +log2_real_l1: + subsd xmm4,xmm5 + + psubq xmm1,xmmword ptr qw_1022[rip] + + movsd xmm6,xmm4 # x + mulsd xmm4,xmm4 # x2 + + movlpd xmm2,qword ptr log2_s_p_1[rip] + movlpd xmm5,qword ptr log2_s_q_1[rip] + mulsd xmm2,xmm6 # p1 + mulsd xmm5,xmm6 # q1 + movsd xmm9,xmm6 # x + + movlpd xmm3,qword ptr log2_s_p_2[rip] + movlpd xmm7,qword ptr log2_s_q_2[rip] + mulsd xmm9,xmm4 # x3 + movsd xmm8,xmm4 # x2 + mulsd xmm4,xmm4 # x4 + + movlpd xmm10,qword ptr log2_s_p_3[rip] + movlpd xmm11,qword ptr log2_s_q_3[rip] + + mulsd xmm3,xmm8 # p2 + mulsd xmm7,xmm8 # q2 + + movlpd xmm12,qword ptr log2_s_p_4[rip] + + mulsd xmm10,xmm9 # p3 + mulsd xmm11,xmm9 # q3 + + mulsd xmm12,xmm4 # p4 + + addsd xmm11,xmm4 # q3+x4 + addsd xmm10,xmm12 # p3+p4 + + mulsd xmm8,qword ptr real_0_5[rip] # 0.5x2 + + cvtdq2pd xmm4,xmm1 + + addsd xmm11,xmm7 # q2+q3+x4 + addsd xmm3,xmm10 # p2+p3+p4 + addsd xmm11,xmm5 # q1+q2+q3+x4 + addsd xmm2,xmm3 # p1+p2+p3+p4 + + movlpd xmm3,qword ptr real_26_bits[rip] + movsd xmm1,xmm6 # x + + addsd xmm11,qword ptr log2_s_q_0[rip] # q0+q1+q2+q3+x4 + addsd xmm2,qword ptr log2_s_p_0[rip] # p0+p1+p2+p3+p4 + + att_jmp log2_real_gl + +pow_real_to_zero_or_nan: + jp pow_real_to_nan + movlpd xmm0,qword ptr real_1_0[rip] + ret + +pow_real_negative_zero_denormalized_or_nan: + ucomisd xmm1,qword ptr real_0_0[rip] + jbe pow_real_zero_negative_or_nan + + # denormalized > 0.0 + +pow_real_denormalized: + ucomisd xmm0,qword ptr real_1_052[rip] + att_jae pow_real_underflow # because (2^-1022)^1.052<2^-1075 + + ucomisd xmm0,qword ptr real_m_1_052[rip] + att_jbe pow_real_overflow # because (2^-1022)^-1.052>2^1075 + + mulsd xmm1,qword ptr real_2_p_55[rip] + + movlpd xmm2,qword ptr mask_significand[rip] + andpd xmm2,xmm1 + + movq xmm5,qword ptr qw_55[rip] + psrlq xmm1,52 + + psubq xmm1,xmm5 + att_jmp log2_real__ + +pow_real_zero_negative_or_nan: + je pow_real_zero_or_nan + + movlpd xmm2,qword ptr mask_all_except_sign[rip] + andpd xmm2,xmm0 # |y| + + ucomisd xmm2,qword ptr real_2_p_53[rip] + jae pow_real_negative_to_large_or_inf + + ucomisd xmm2,qword ptr real_1_0[rip] + jb pow_real_negative_to_non_int + + movq xmm3,qword ptr round_c[rip] + psrlq xmm2,52 + psubq xmm3,xmm2 + + movq xmm4,qword ptr mask_all_one[rip] + movq xmm5,qword ptr mask_all_one_except_last[rip] + psllq xmm4,xmm3 + psllq xmm5,xmm3 + andpd xmm4,xmm0 + andpd xmm5,xmm0 # 1 -> 2 and -1 -> -2 + + ucomisd xmm4,xmm0 + att_jne pow_real_negative_to_non_int + + ucomisd xmm5,xmm0 + je pow_real_negative_to_even_int + +pow_real_negative_to_odd_int: + att_call pow_real_negative_to_even_int + movlpd xmm1,qword ptr mask_sign[rip] + xorpd xmm0,xmm1 + ret + +pow_real_negative_to_even_int: + movlpd xmm2,qword ptr mask_all_except_sign[rip] + andpd xmm1,xmm2 # |x| + ucomisd xmm1,qword ptr real_0_83[rip] # x>=0.83 + att_jae pow_real_not_small + + ucomisd xmm1,qword ptr real_2_p_m_1022[rip] + att_jae pow_real_small_ + att_jmp pow_real_denormalized + +pow_real_negative_to_large_or_inf: + movlpd xmm2,qword ptr mask_all_except_sign[rip] + andpd xmm1,xmm2 # |x| + ucomisd xmm1,qword ptr real_0_83[rip] + jb pow_real_small_to_large_or_inf # because 0.83^(2^53) too large or small + ucomisd xmm1,qword ptr real_7_d_6[rip] + + movlpd xmm2,qword ptr real_1_0[rip] + + ja pow_real_large_to_large_or_inf # because 1.16^(2^53) too large or small + + att_jmp pow_real_between_0_83_and_7_d_6 + +pow_real_negative_to_non_int: + ucomisd xmm1,qword ptr real_m_max[rip] + jb pow_real_m_inf_to_non_int # x==-inf + movlpd xmm0,qword ptr real_0_0[rip] + + divsd xmm0,xmm0 + ret + +pow_real_zero_or_nan: + jp pow_real_nan_to_non_zero +pow_real_zero: + movlpd xmm2,qword ptr real_1_0[rip] + orpd xmm2,xmm1 # 0.0 -> 1.0 and -0.0 -> -1.0 + ucomisd xmm2,qword ptr real_0_0[rip] + jb pow_real_m_zero + +pow_real_small_to_large_or_inf: +pow_real_m_zero_to_non_odd_int: + ucomisd xmm0,qword ptr real_0_0[rip] + att_jb pow_real_overflow + +pow_real_inf_to_negative: +pow_real_underflow: + movlpd xmm0,qword ptr real_0_0[rip] + ret + +pow_real_m_inf_to_non_int: +pow_real_large_to_large_or_inf: + ucomisd xmm0,qword ptr real_0_0[rip] + att_jb pow_real_underflow + +pow_real_overflow: + movlpd xmm0,qword ptr real_1_0[rip] + divsd xmm0,qword ptr real_0_0[rip] # inf + ret + +pow_real_m_zero: + movlpd xmm2,qword ptr mask_all_except_sign[rip] + andpd xmm2,xmm0 # |y| + + ucomisd xmm2,qword ptr real_2_p_53[rip] + att_jae pow_real_m_zero_to_non_odd_int + + ucomisd xmm2,qword ptr real_m_1_0[rip] + att_jb pow_real_m_zero_to_non_odd_int + + movq xmm4,qword ptr round_c[rip] + psrlq xmm2,52 + psubq xmm4,xmm2 + + movq xmm5,qword ptr mask_all_one_except_last[rip] + psllq xmm5,xmm4 + andpd xmm5,xmm0 # 1 -> 2 and -1 -> -2 + + ucomisd xmm5,xmm0 + att_je pow_real_m_zero_to_non_odd_int + + ucomisd xmm0,xmm3 + jb pow_real_m_overflow + + movsd xmm0,xmm1 # -0.0 + ret + +pow_real_m_overflow: + movlpd xmm0,qword ptr real_m_1_0[rip] + divsd xmm0,qword ptr real_0_0[rip] # -inf + ret + +pow_real_nan_to_non_zero: +pow_real_to_nan: + addsd xmm0,xmm1 + ret + +pow_real_inf: + ucomisd xmm0,qword ptr real_0_0[rip] + att_jb pow_real_inf_to_negative + movsd xmm0,xmm1 + ret + +pow_real_exp_too_large: + ucomisd xmm1,xmm2 + je power_one_to_large_or_inf + att_ja pow_real_overflow + att_jmp pow_real_underflow + +pow_real_exp_too_large_m: + ucomisd xmm1,xmm2 + je power_one_to_large_or_inf_m + att_jb pow_real_overflow + att_jmp pow_real_underflow + +power_one_to_large_or_inf: + ucomisd xmm0,qword ptr real_max[rip] + ja power_one_to_inf + movsd xmm0,xmm1 + ret + +power_one_to_large_or_inf_m: + ucomisd xmm0,qword ptr real_m_max[rip] + jb power_one_to_inf_m + movsd xmm0,xmm1 + ret + +power_one_to_inf_m: +power_one_to_inf: + subsd xmm0,xmm0 + ret + + +pow_real_g_7_d_6: + subsd xmm3,xmm5 + + mulsd xmm3,qword ptr real_0_75[rip] + lea rcx,log2_b_c[rip] + lea rdx,log2_s_c[rip] + + movlpd xmm4,qword ptr real_0_25[rip] + + ucomisd xmm3,xmm4 + subsd xmm3,xmm4 + + cmovb rcx,rdx + psubq xmm1,xmmword ptr qw_1023[rip] + + movsd xmm6,xmm3 # x + mulsd xmm3,xmm3 # x2 + + movlpd xmm2,qword ptr (log2_b_p_1-log2_b_c)[rcx] + movlpd xmm5,qword ptr (log2_b_q_1-log2_b_c)[rcx] + mulsd xmm2,xmm6 # p1 + mulsd xmm5,xmm6 # q1 + movsd xmm9,xmm6 # x + + cvtdq2pd xmm4,xmm1 + + movlpd xmm1,qword ptr (log2_b_p_2-log2_b_c)[rcx] + movlpd xmm7,qword ptr (log2_b_q_2-log2_b_c)[rcx] + mulsd xmm9,xmm3 # x3 + movsd xmm8,xmm3 # x2 + mulsd xmm3,xmm3 # x4 + + movlpd xmm10,qword ptr (log2_b_p_3-log2_b_c)[rcx] + movlpd xmm11,qword ptr (log2_b_q_3-log2_b_c)[rcx] + + mulsd xmm1,xmm8 # p2 + mulsd xmm7,xmm8 # q2 + + movlpd xmm12,qword ptr (log2_b_p_4-log2_b_c)[rcx] + + mulsd xmm10,xmm9 # p3 + mulsd xmm11,xmm9 # q3 + + mulsd xmm12,xmm3 # p4 + + addsd xmm11,xmm3 # q3+x4 + addsd xmm10,xmm12 # p3+p4 + + mulsd xmm8,qword ptr real_0_5[rip] # 0.5x2 + + addsd xmm11,xmm7 # q2+q3+x4 + addsd xmm1,xmm10 # p2+p3+p4 + addsd xmm11,xmm5 # q1+q2+q3+x4 + addsd xmm2,xmm1 # p1+p2+p3+p4 + + movlpd xmm3,qword ptr real_26_bits[rip] + movsd xmm1,xmm6 # x + + addsd xmm11,qword ptr (log2_b_q_0-log2_b_c)[rcx] # q0+q1+q2+q3+x4 + addsd xmm2,qword ptr (log2_b_p_0-log2_b_c)[rcx] # p0+p1+p2+p3+p4 + + subsd xmm6,xmm8 # x-0.5x2 + + addsd xmm4,qword ptr log2_4_d_3_42[rip] # e+(log2 4/3)_h + + divsd xmm2,xmm11 # p/q + + andpd xmm3,xmm6 # (x-0.5x2)_h + subsd xmm1,xmm3 # x-(x-0.5x2)_h + subsd xmm1,xmm8 # (x-0.5x2)_l + + mulsd xmm2,xmm9 # (p/q)*x3 + + mulsd xmm6,qword ptr d1ln2_26_l[rip] # (x-0.5x2) * (1/ln 2)_l + mulsd xmm1,qword ptr d1ln2_26[rip] # (x-0.5x2)_l * (1/ln 2)_h + mulsd xmm3,qword ptr d1ln2_26[rip] # (x-0.5x2)_h * (1/ln 2)_h + + movsd xmm7,xmm3 # xh + addsd xmm3,xmm4 # xh+e+(log2 4/3)_h + + addsd xmm1,xmm6 # (x-0.5x2) * (1/ln 2)_l+(x-0.5x2)_l * (1/ln 2)_h + + subsd xmm4,xmm3 # e+(log2 4/3)_h-(xh+e+(log2 4/3)_h) + + addsd xmm4,xmm7 # (e+(log2 4/3)_h-(xh+e+(log2 4/3)_h))+xh + addsd xmm1,qword ptr log2_4_d_3_42_l[rip] # xl+(log2 4/3)_l + + addsd xmm1,xmm4 # (x+log2 4/3)_l + + movsd xmm5,xmm1 # xl + addsd xmm1,xmm2 # xl+r + movlpd xmm6,qword ptr real_26_bits[rip] + + addsd xmm1,xmm3 # e+xh+xl+r + + andpd xmm1,xmm6 # (log2 x)_h + andpd xmm6,xmm0 # y_h + + subsd xmm4,xmm1 # e-(log2 x)_h + + addsd xmm3,xmm4 # e+xh-(log2 x)_h +# jmp pow_real_2 + + +pow_real_2: +# xmm0 = y +# xmm1 = (log2 x)_h +# xmm2 = r +# xmm3 = (e+)xh-(log2 x)_h +# xmm5 = xl +# xmm6 = y_h + movsd xmm7,xmm0 # y + subsd xmm0,xmm6 # y_l + mulsd xmm6,xmm1 # p_h = y_h*(log2 x)_h + + addsd xmm3,xmm2 # xh+r-(log2 x)_h + mulsd xmm0,xmm1 # y_l*(log2 x)_h + + addsd xmm3,xmm5 # (log2 x)_l + + mulsd xmm3,xmm7 # y*(log2 x)_l + + addsd xmm0,xmm3 # p_l + + movsd xmm7,xmm0 # p_l + addsd xmm0,xmm6 # p + +exp2: + ucomisd xmm0,qword ptr real_m_0_5[rip] + jb exp2_l_n + ucomisd xmm0,qword ptr real_0_5[rip] + ja exp2_l +exp2_l_: + ucomisd xmm0,qword ptr real_m_0_25[rip] + jbe exp2_l_m_0_25 + ucomisd xmm0,qword ptr real_0_25[rip] + jae exp2_g_0_25 + + ucomisd xmm0,qword ptr real_0_0[rip] + + lea rcx,exp2_p0_c[rip] + lea rdx,exp2_m0_c[rip] + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + + cmovb rcx,rdx +exp2_: + movlpd xmm13,qword ptr (exp2_p0_2-exp2_p0_c)[rcx] + movlpd xmm14,qword ptr (exp2_p0_4-exp2_p0_c)[rcx] + + movsd xmm2,xmm0 # x2 + mulsd xmm0,xmm0 # x4 + + mulsd xmm13,xmm2 # p2 + + movsd xmm3,xmm1 # x + mulsd xmm1,xmm2 # x3 + + movlpd xmm15,qword ptr (exp2_p0_3-exp2_p0_c)[rcx] + mulsd xmm14,xmm0 # p4 + + mulsd xmm15,xmm1 # p3 + + movsd xmm4,xmm0 # x4 + mulsd xmm0,xmm0 # x8 + + movsd xmm5,xmm2 # x2 + mulsd xmm2,xmm1 # x5 + + mulsd xmm5,xmm4 # x6 + mulsd xmm1,xmm4 # x7 + mulsd xmm4,xmm2 # x9 + + mulsd xmm0,qword ptr (exp2_p0_8-exp2_p0_c)[rcx] # p8 + mulsd xmm2,qword ptr (exp2_p0_5-exp2_p0_c)[rcx] # p5 + mulsd xmm5,qword ptr (exp2_p0_6-exp2_p0_c)[rcx] # p6 + mulsd xmm1,qword ptr (exp2_p0_7-exp2_p0_c)[rcx] # p7 + mulsd xmm4,qword ptr (exp2_p0_9-exp2_p0_c)[rcx] # p9 + + addsd xmm0,xmm4 # p9+p8 + + movlpd xmm9,qword ptr real_26_bits[rip] + andpd xmm9,xmm6 # xh_h + mulsd xmm3,qword ptr (exp2_p0_1l-exp2_p0_c)[rcx] # x*c1l + + addsd xmm0,xmm1 # p9+p8+p7 + + subsd xmm6,xmm9 # xh_l + mulsd xmm9,qword ptr (exp2_p0_1h-exp2_p0_c)[rcx] # p1h=xh_h*c1h + + addsd xmm0,xmm5 # p9+p8+p7+p6 + + movlpd xmm1,qword ptr (exp2_p0_0h-exp2_p0_c)[rcx] + addsd xmm6,xmm7 # xh_l+x_l + movsd xmm4,xmm9 # p1h + addsd xmm9,xmm1 # p1h+c0h + + addsd xmm0,xmm2 # p9+p8+p7+p6+p5 + + mulsd xmm6,qword ptr (exp2_p0_1h-exp2_p0_c)[rcx] # (xh_l+x_l)*c1h + subsd xmm1,xmm9 # c0h-(p1h+c0h) + + addsd xmm0,xmm14 # p9+p8+p7+p6+p5+p4 + + addsd xmm3,xmm6 # (xh_l+x_l)*c1h+x*c1l + addsd xmm1,xmm4 # (c0h-(p1h+c0h))+p1h + + addsd xmm0,xmm15 # p9+p8+p7+p6+p5+p4+p3 + + addsd xmm1,xmm3 + + addsd xmm0,xmm13 # p9+p8+p7+p6+p5+p4+p3+p2 + + addsd xmm0,qword ptr (exp2_p0_0l-exp2_p0_c)[rcx] + + addsd xmm0,xmm1 + addsd xmm0,xmm9 + ret + +exp2_l_m_0_25: + movlpd xmm1,qword ptr real_0_25[rip] + addsd xmm0,xmm1 + addsd xmm6,xmm1 + + lea rcx,exp2_m0_25_c[rip] + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + att_jmp exp2_ + +exp2_g_0_25: + movlpd xmm1,qword ptr real_0_25[rip] + subsd xmm0,xmm1 + subsd xmm6,xmm1 + + lea rcx,exp2_p0_25_c[rip] + + movsd xmm1,xmm0 # x + mulsd xmm0,xmm0 # x2 + att_jmp exp2_ + +exp2_l_n: + movlpd xmm1,qword ptr real_m_0_5[rip] + movq xmm4,qword ptr m_round_c[rip] + jmp exp2_l_pn + +exp2_l: + movlpd xmm1,qword ptr real_0_5[rip] + movq xmm4,qword ptr round_c[rip] + +exp2_l_pn: + addsd xmm1,xmm0 + + movq xmm2,xmm1 + psrlq xmm1,52 + psubq xmm4,xmm1 + movq xmm1,qword ptr mask_all_one[rip] + psllq xmm1,xmm4 + andpd xmm1,xmm2 + + subsd xmm6,xmm1 + movsd xmm0,xmm7 + + cvtpd2dq xmm8,xmm1 + + ucomisd xmm1,qword ptr qword ptr real_1023[rip] + ja exp2_overflow0 + ucomisd xmm1,qword ptr qword ptr real_m_1022[rip] + jb exp2_underflow0 + + movq xmm9,qword ptr qw_1023[rip] + + addsd xmm0,xmm6 + + paddq xmm8,xmm9 + + psllq xmm8,52 + + att_call exp2_l_ + + mulsd xmm0,xmm8 + ret + +exp2_overflow0: + ucomisd xmm1,qword ptr qword ptr real_1025[rip] + ja exp2_overflow + + movq xmm9,qword ptr qw_1021[rip] + + addsd xmm0,xmm6 + + paddq xmm8,xmm9 + + psllq xmm8,52 + + att_call exp2_l_ + + mulsd xmm0,xmm8 + mulsd xmm0,qword ptr real_4_0[rip] + ret + +exp2_overflow: + movlpd xmm0,qword ptr qword ptr real_2_p_1023[rip] + mulsd xmm0,xmm0 + ret + +exp2_underflow0: + ucomisd xmm1,qword ptr qword ptr real_m_1076[rip] + jbe exp2_underflow + + movq xmm9,qword ptr qw_1078[rip] + + addsd xmm0,xmm6 + + paddq xmm8,xmm9 + + psllq xmm8,52 + + att_call exp2_l_ + + mulsd xmm0,xmm8 + mulsd xmm0,qword ptr real_2_p_m_55[rip] + ret + +exp2_underflow: + movlpd xmm0,qword ptr qword ptr real_0_0[rip] + ret + + .data + +real_13_bits: + .quad 0xffffff0000000000 +real_14_bits: + .quad 0xffffff8000000000 +real_17_bits: + .quad 0xfffffff000000000 +real_18_bits: + .quad 0xfffffff800000000 +real_21_bits: + .quad 0xffffffff00000000 +real_25_bits: + .quad 0xfffffffff0000000 +real_26_bits: + .quad 0xfffffffff8000000 +real_27_bits: + .quad 0xfffffffffc000000 +real_34_bits: + .quad 0xfffffffffff80000 +real_39_bits: + .quad 0xffffffffffffc000 +real_40_bits: + .quad 0xffffffffffffe000 +real_47_bits: + .quad 0xffffffffffffffc0 +real_48_bits: + .quad 0xffffffffffffffe0 +real_49_bits: + .quad 0xfffffffffffffff0 +real_50_bits: + .quad 0xfffffffffffffff8 +real_51_bits: + .quad 0xfffffffffffffffc + +real_abs_40_bits: + .quad 0x7fffffffffffe000 +real_abs_39_bits: + .quad 0x7fffffffffffc000 + +real_0_0: + .double 0r0.0 +real_0_25: + .double 0r0.25 +real_m_0_25: + .double 0r-0.25 +real_0_5: + .double 0r0.5 +real_m_0_5: + .double 0r-0.5 +real_1_0: + .double 0r1.0 +real_m_1_0: + .double 0r-1.0 +real_1_25: + .double 0r1.25 +real_m_1_25: + .double 0r-1.25 +real_1_5: + .double 0r1.5 +real_m_1_5: + .double 0r-1.5 +real_2_0: + .double 0r2.0 +real_m_2_0: + .double 0r-2.0 +real_3_0: + .double 0r3.0 +real_m_3_0: + .double 0r-3.0 +real_4_0: + .double 0r4.0 +real_m_4_0: + .double 0r-4.0 +real_5_0: + .double 0r5.0 +real_m_5_0: + .double 0r-5.0 + +real_2_p_m_11: + .double 0r4.8828125E-4 +real_2_p_m_12: + .double 0r2.44140625E-4 +real_m_2_p_m_12: + .double 0r-2.44140625E-4 +real_2_p_m_13: + .double 0r1.220703125E-4 +real_2_p_m_16: + .double 0r1.52587890625E-5 +real_m_2_p_m_16: + .double 0r-1.52587890625E-5 +real_2_p_m_18: + .double 0r3.814697265625E-6 +real_2_p_m_31: + .double 0r4.656612873077392578125E-10 + +real_0_4: + .double 0r0.4 +real_m_0_4: + .double 0r-0.4 +real_0_125: + .double 0r0.125 + +real_0_675: + .double 0r0.675 +real_m_0_675: + .double 0r-0.675 +real_0_9920: + .double 0r0.9920 +real_0_54000000017867999524: + .double 0r0.54000000017867999524 +real_m_0_54000000017867999524: + .double 0r-0.54000000017867999524 +real_0_03125: + .double 0r0.03125 +real_pi_d_2_m_0_03125: + .double 0r1.5395463267948965579 + +real_0_58: + .double 0r0.58 +real_m_0_58: + .double 0r-0.58 +real_0_54: + .double 0r0.54 +real_0_76: + .double 0r0.76 +real_0_65000000004061742054: + .double 0r0.65000000004061742054 + +real_0_26: + .double 0r0.26 +real_m_0_26: + .double 0r-0.26 +real_0_70: + .double 0r0.70 +real_m_0_70: + .double 0r-0.70 +real_1_4: + .double 0r1.4 +real_m_1_4: + .double 0r-1.4 + +real_0_7025: + .double 0r0.7025 +real_1_405: + .double 0r1.405 +real_0_8243: + .double 0r0.82436063535006407342 +real_1_6487: + .double 0r1.6487212707001281468 +real_0_833: + .double 0r0.833 +real_1_666: + .double 0r1.666 +real_7_d_6: + .double 0r1.1666666666666667407 +real_0_75: + .double 0r0.75 + +ln_4_d_3_53: + .double 0r2.8768207245178090137E-1 +ln_4_d_3_53_l: + .double 0r2.6071606164425639761E-17 +ln_4_d_3_42: + .double 0r2.8768207245173016417E-1 +ln_4_d_3_42_l: + .double 0r5.0763263831534079404E-14 + +log10_4_d_3_53: + .double 0r1.2493873660829994676E-1 +log10_4_d_3_53_l: + .double 0r6.3704258509422005489E-18 +log10_4_d_3_42: + .double 0r1.2493873660829990513E-1 +log10_4_d_3_42_l: + .double 0r4.8003789274385573126E-17 + +log2_4_d_3_53: + .double 0r4.1503749927884381332E-1 +log2_4_d_3_53_l: + .double 0r5.2244900613901090572E-18 +log2_4_d_3_42: + .double 0r4.1503749927881017356E-1 +log2_4_d_3_42_l: + .double 0r3.3644982136203633607E-14 + +mask_significand: + .quad 0x0000fffffffffffff + +real_1_66: + .double 0r1.66 +real_0_83: + .double 0r0.83 + +real_pi: + .double 0r3.1415926535897931160 +real_m_pi: + .double 0r-3.1415926535897931160 +real_pi_l: + .double 0r1.2246467991473532072E-16 +real_m_pi_l: + .double 0r-1.2246467991473532072E-16 + +real_pi_d_2: + .double 0r1.5707963267948965579 +real_pi_d_2_l: + .double 0r6.1232339957367660359E-17 + +real_m_pi_d_2: + .double 0r-1.5707963267948965579 +real_m_pi_d_2_l: + .double 0r-6.1232339957367660359E-17 + +real_pi_m_1_d_2: + .double 0r1.07079632679489655800 +real_pi_d_4: + .double 0r7.8539816339744827900E-1 +real_m_pi_d_4: + .double 0r-7.8539816339744827900E-1 +real_pi_d_4_l: + .double 0r3.0616169978683830179E-17 +real_m_pi_d_4_l: + .double 0r-3.0616169978683830179E-17 +real_pi_p_0_375_d_4: + .double 0r8.7914816339744830962E-1 +real_pi_p_0_875_d_4: + .double 0r1.0041481633974482790 +real_pi_p_1_5_d_4: + .double 0r1.1603981633974483096 +real_3_pi_d_4: + .double 0r2.3561944901923449288 +real_m_3_pi_d_4: + .double 0r-2.3561944901923449288 +real_5_pi_d_4: + .double 0r3.9269908169872413950 +real_m_5_pi_d_4: + .double 0r-3.9269908169872413950 +real_7_pi_d_4: + .double 0r5.4977871437821379530 +real_m_7_pi_d_4: + .double 0r-5.4977871437821379530 +real_9_pi_d_4: + .double 0r7.0685834705770345110 +real_m_9_pi_d_4: + .double 0r-7.0685834705770345110 +real_3_pi_d_2: + .double 0r4.7123889803846896740 +real_m_3_pi_d_2: + .double 0r-4.7123889803846896740 +real_3_pi_d_2_l: + .double 0r1.8369701987210296875E-16 +real_m_3_pi_d_2_l: + .double 0r-1.8369701987210296875E-16 +real_2_pi: + .double 0r6.2831853071795862320 +real_2_pi_l: + .double 0r2.4492935982947064143E-16 +real_m_2_pi_l: + .double 0r-2.4492935982947064143E-16 + +real_4_d_pi: + .double 0r1.2732395447351627649 +real_pi_d_4_26: + .double 0r7.8539815545082092285E-1 +real_pi_d_4_26_26: + .double 0r7.9466273561479283671E-9 +real_pi_d_4_52_l: + .double 0r3.0616169978683830179E-17 + +real_36825084_pi: + .double 0r115689413.36222703755 +real_m_36825084_pi: + .double 0r-115689413.36222703755 +real_36825084_pi_27: + .double 0r115689413.0 +real_36825084_pi_27_27: + .double 0r3.6222703754901885986E-1 +real_36825084_pi_54_l: + .double 0r1.3588030639188803060E-17 + +real_pi_d_2_m_1_0_52: + .double 0r5.7079632679489655800E-1 +real_1_0_m_pi_d_2_52: + .double 0r-5.7079632679489655800E-1 +real_pi_d_2_m_1_0_52_l: + .double 0r6.1232339957367660359E-17 +real_m_pi_d_2_m_1_0_52_l: + .double 0r-6.1232339957367660359E-17 +real_pi_d_2_p_1_0: + .double 0r2.57079632679489655800 +real_pi_d_2_p_1_0_l: + .double 0r6.1232339957367660359E-17 +real_m_pi_d_2_p_1_0_l: + .double 0r-6.1232339957367660359E-17 + +real_0_338: + .double 0r0.338 +real_m_0_338: + .double 0r-0.338 +real_0_542: + .double 0r0.542 +real_m_0_542: + .double 0r-0.542 +real_0_699: + .double 0r0.699 +real_m_0_699: + .double 0r-0.699 +real_0_89: + .double 0r0.89 +real_m_0_89: + .double 0r-0.89 +real_1_04: + .double 0r1.04 +real_m_1_04: + .double 0r-1.04 +real_1_18: + .double 0r1.18 +real_m_1_18: + .double 0r-1.18 + +real_0_5_pi_m_1_18: + .double 0r3.9079632679489667568E-1 +real_1_18_m_0_5_pi: + .double 0r-3.9079632679489667568E-1 +real_0_5_pi_m_1_04: + .double 0r5.3079632679489663349E-1 +real_1_04_m_0_5_pi: + .double 0r-5.3079632679489663349E-1 +real_0_5_pi_m_0_89: + .double 0r6.8079632679489665570E-1 +real_0_89_m_0_5_pi: + .double 0r-6.8079632679489665570E-1 + +real_pi_m_1_18: + .double 0r1.9615926535897934002 +real_n_pi_m_1_18: + .double 0r-1.9615926535897934002 +real_pi_m_1_04: + .double 0r2.1015926535897930805 +real_n_pi_m_1_04: + .double 0r-2.1015926535897930805 +real_pi_m_0_89: + .double 0r2.2515926535897934357 +real_n_pi_m_0_89: + .double 0r-2.2515926535897934357 +real_pi_m_0_699: + .double 0r2.4425926535897932723 +real_n_pi_m_0_699: + .double 0r-2.4425926535897932723 +real_pi_m_0_542: + .double 0r2.5995926535897933007 +real_n_pi_m_0_542: + .double 0r-2.5995926535897933007 +real_pi_m_0_338: + .double 0r2.8035926535897930378 +real_n_pi_m_0_338: + .double 0r-2.8035926535897930378 + +real_pi_p_0_338: + .double 0r3.4795926535897931942 +real_n_pi_p_0_338: + .double 0r-3.4795926535897931942 +real_pi_p_0_542: + .double 0r3.6835926535897933753 +real_n_pi_p_0_542: + .double 0r-3.6835926535897933753 +real_pi_p_0_699: + .double 0r3.8405926535897934038 +real_n_pi_p_0_699: + .double 0r-3.8405926535897934038 +real_pi_p_0_89: + .double 0r4.0315926535897936844 +real_n_pi_p_0_89: + .double 0r-4.0315926535897936844 +real_pi_p_1_04: + .double 0r4.1815926535897931515 +real_n_pi_p_1_04: + .double 0r-4.1815926535897931515 +real_pi_p_1_18: + .double 0r4.3215926535897928318 +real_n_pi_p_1_18: + .double 0r-4.3215926535897928318 + +real_2pi_m_1_18: + .double 0r5.1031853071795865162 +real_n_2pi_m_1_18: + .double 0r-5.1031853071795865162 +real_2pi_m_1_04: + .double 0r5.2431853071795861965 +real_n_2pi_m_1_04: + .double 0r-5.2431853071795861965 +real_2pi_m_0_89: + .double 0r5.3931853071795865517 +real_n_2pi_m_0_89: + .double 0r-5.3931853071795865517 +real_2pi_m_0_699: + .double 0r5.5841853071795863883 +real_n_2pi_m_0_699: + .double 0r-5.5841853071795863883 +real_2pi_m_0_542: + .double 0r5.7411853071795864167 +real_n_2pi_m_0_542: + .double 0r-5.7411853071795864167 +real_2pi_m_0_338: + .double 0r5.9451853071795861538 +real_n_2pi_m_0_338: + .double 0r-5.9451853071795861538 + +real_2pi_p_0_338: + .double 0r6.6211853071795863102 +real_n_2pi_p_0_338: + .double 0r-6.6211853071795863102 +real_2pi_p_0_542: + .double 0r6.8251853071795869354 +real_n_2pi_p_0_542: + .double 0r-6.8251853071795869354 +real_2pi_p_0_699: + .double 0r6.9821853071795860757 +real_n_2pi_p_0_699: + .double 0r-6.9821853071795860757 +real_2pi_p_0_89: + .double 0r7.1731853071795868004 +real_n_2pi_p_0_89: + .double 0r-7.1731853071795868004 + +atan_0_5_53: + .double 0r4.6364760900080609352E-1 +m_atan_0_5_53: + .double 0r-4.6364760900080609352E-1 +atan_0_5_53_l: + .double 0r2.2698777452961687092E-17 + +atan_sqrt_0_5: + .double 0r6.1547970867038737097E-1 +m_atan_sqrt_0_5: + .double 0r-6.1547970867038737097E-1 +atan_sqrt_0_5_l: + .double 0r-2.9904856561351187768E-17 + +real_h_pi_m_atan_s_2_53: + .double 0r6.1547970867038737097E-1 +real_h_pi_m_atan_s_2_53_l: + .double 0r-2.9904856561351187768E-17 + +real_sqrt_2_p_1_5_pi_m_3_atan_sqrt_2_53: + .double 0r3.2606526883842570363 +real_sqrt_2_p_1_5_pi_m_3_atan_sqrt_2_53_l: + .double 0r3.5657102106448616432E-17 +real_3_atan_sqrt_2_m_sqrt_2_m_1_5_pi_m_53_l: + .double 0r-3.5657102106448616432E-17 + +atan_sqrt_2: + .double 0r9.5531661812450929805E-1 +m_atan_sqrt_2: + .double 0r-9.5531661812450929805E-1 +atan_sqrt_2_l: + .double 0r-1.9885105943796805915E-17 + +real_h_pi_m_atan_2_52: + .double 0r4.6364760900080603800E-1 +real_h_pi_m_atan_2_52_l: + .double 0r7.8209928684219507951E-17 +real_2_p_2_5_pi_m_5_atan_2_53: + .double 0r4.3182380450040307451 +real_2_p_2_5_pi_m_5_atan_2_53_l: + .double 0r-1.6406186889148070581E-16 +real_5_atan_2_m_2_m_2_5_pi_53_l: + .double 0r1.6406186889148070581E-16 + +atan_2_53: + .double 0r1.1071487177940904090 +m_atan_2_53: + .double 0r-1.1071487177940904090 +atan_2_53_l: + .double 0r9.4044713735663794124E-17 +real_n_2_m_5_atan_2_53: + .double 0r-3.5357435889704524890 +real_n_2_m_5_atan_2_53_l: + .double 0r-2.6134358828256379104E-17 + +real_5_atan_2_m_2_53: + .double 0r3.5357435889704524890 +real_5_atan_2_m_2_53_l: + .double 0r2.6134358828256379104E-17 + +real_ln_2_t_0_5: + .double 0r3.4657359027997265471E-1 +real_n_ln_2_t_0_5: + .double 0r-3.4657359027997265471E-1 +real_ln_2_t_1_5: + .double 0r1.0397207708399179641 +real_ln2_42: + .double 0r0.69314718055989033019 +real_ln2_42_l: + .double 0r5.4979230187083711552E-14 +real_1_d_ln2: + .double 0r1.4426950408889634074 + +real_log2_10_42: + .double 0r0.30102999566395283182 +real_log2_10_42_l: + .double 0r2.8363394551044964154E-14 + +#d1ln10: +# .double 0r4.3429448190325181667E-1 +d1ln10_26: + .double 0r4.3429447710514068603E-1 +d1ln10_26_l: + .double 0r4.7981111416159728067E-9 + +#d1ln2: +# .double 0r1.4426950408889633870 +d1ln2_26: + .double 0r1.4426950216293334961 +d1ln2_26_l: + .double 0r1.9259629911266174689E-8 + + .align 4 +round_c: + .quad 0x3ff+52 + .quad 0x3ff+52 +m_round_c: + .quad 0x0bff+52 + .quad 0x0bff+52 +round_even_c: + .quad 0x3ff+53 + .quad 0x3ff+53 +m_round_even_c: + .quad 0x0bff+53 + .quad 0x0bff+53 +mask_all_one: + .quad -1 + .quad -1 +mask_all_one_except_last: + .quad -2 + .quad -2 +mask_all_one_except_second_last: + .quad -3 + .quad -3 +qw_1023: + .quad 0x3ff + .quad 0x3ff +qw_1022: + .quad 0x3fe + .quad 0x3fe +qw_1021: + .quad 0x3fd + .quad 0x3fd +qw_1077: + .quad 0x3ff+54 + .quad 0x3ff+54 +qw_1078: + .quad 0x3ff+55 + .quad 0x3ff+55 +qw_55: + .quad 55 + .quad 55 +mask_sign: + .quad 0x08000000000000000 + .quad 0x08000000000000000 +mask_all_except_sign: + .quad 0x07fffffffffffffff + .quad 0x07fffffffffffffff + +real_0_43540000008249979402: + .double 0r0.43540000008249979402 +real_m_0_43540000008249979402: + .double 0r-0.43540000008249979402 +real_0_600000000082499762577: + .double 0r0.600000000082499762577 +real_m_0_600000000082499762577: + .double 0r-0.600000000082499762577 + +#sin_p_0: +# .double 0r-1.6666666666666629659E-1 +#sin_p_1: +# .double 0r8.3333333333220592498E-3 +#sin_p_2: +# .double 0r-1.9841269829591994520E-4 +#sin_p_3: +# .double 0r2.7557313627980839565E-6 +#sin_p_4: +# .double 0r-2.5050749232606622552E-8 +#sin_p_5: +# .double 0r1.5896334589820396677E-10 + +sin_p_0: + .double 0r-1.6666666666666635210E-1 +sin_p_1: + .double 0r8.3333333333230012047E-3 +sin_p_2: + .double 0r-1.9841269830204560616E-4 +sin_p_3: + .double 0r2.7557313816586650493E-6 +sin_p_4: + .double 0r-2.5050776321203382309E-8 +sin_p_5: + .double 0r1.5897788850050641142E-10 + +sin_p1_c: +sin_p1_0h: + .double 0r5.6464247346312534948E-1 +sin_p1_0l: + .double 0r-6.06241788103829245082E-24 +sin_p1_1h: + .double 0r8.2533560693264007568E-1 +sin_p1_1l: + .double 0r7.9304552924597260244E-9 +sin_p1_2: + .double 0r-2.8232123673155168353E-1 +sin_p1_3: + .double 0r-1.3755593581121927649E-1 +sin_p1_4: + .double 0r2.3526769749636338575E-2 +sin_p1_5: + .double 0r6.8777964033410682668E-3 +sin_p1_6: + .double 0r-7.8422158064252829243E-4 +sin_p1_7: + .double 0r-1.6378316139497952112E-4 +sin_p1_8: + .double 0r1.4102652890564176127E-5 +sin_p1_9: + .double 0r2.0754701007995074451E-6 + +sin_p2_c: +sin_p2_0h: + .double 0r5.6464247346312534948E-1 +sin_p2_0l: + .double 0r-6.06241788103829245082E-24 +sin_p2_1h: + .double 0r8.2533560693264007568E-1 +sin_p2_1l: + .double 0r7.9304554034820284869E-9 +sin_p2_2: + .double 0r-2.8232123673155273824E-1 +sin_p2_3: + .double 0r-1.3755593580986152924E-1 +sin_p2_4: + .double 0r2.3526769748731638648E-2 +sin_p2_5: + .double 0r6.8777971733520841535E-3 +sin_p2_6: + .double 0r-7.8422151328288352071E-4 +sin_p2_7: + .double 0r-1.6373002515216467253E-4 +sin_p2_8: + .double 0r1.4106512050855138886E-5 +sin_p2_9: + .double 0r2.4764422854631738044E-6 + +sin_p3_c: +sin_p3_0h: + .double 0r-5.6464247346312534948E-1 +sin_p3_0l: + .double 0r6.06241788103829245082E-24 +sin_p3_1h: + .double 0r8.2533560693264007568E-1 +sin_p3_1l: + .double 0r7.9304552924597260244E-9 +sin_p3_2: + .double 0r2.8232123673155168353E-1 +sin_p3_3: + .double 0r-1.3755593581121927649E-1 +sin_p3_4: + .double 0r-2.3526769749636338575E-2 +sin_p3_5: + .double 0r6.8777964033410682668E-3 +sin_p3_6: + .double 0r7.8422158064252829243E-4 +sin_p3_7: + .double 0r-1.6378316139497952112E-4 +sin_p3_8: + .double 0r-1.4102652890564176127E-5 +sin_p3_9: + .double 0r2.0754701007995074451E-6 + +sin_p4_c: +sin_p4_0h: + .double 0r-5.6464247346312534948E-1 +sin_p4_0l: + .double 0r6.06241788103829245082E-24 +sin_p4_1h: + .double 0r8.2533560693264007568E-1 +sin_p4_1l: + .double 0r7.9304554034820284869E-9 +sin_p4_2: + .double 0r2.8232123673155273824E-1 +sin_p4_3: + .double 0r-1.3755593580986152924E-1 +sin_p4_4: + .double 0r-2.3526769748731638648E-2 +sin_p4_5: + .double 0r6.8777971733520841535E-3 +sin_p4_6: + .double 0r7.8422151328288352071E-4 +sin_p4_7: + .double 0r-1.6373002515216467253E-4 +sin_p4_8: + .double 0r-1.4106512050855138886E-5 +sin_p4_9: + .double 0r2.4764422854631738044E-6 + +cos_p_0: + .double 0r4.1666666666666588026E-2 +cos_p_1: + .double 0r-1.3888888888872778175E-3 +cos_p_2: + .double 0r2.4801587288764459412E-5 +cos_p_3: + .double 0r-2.7557314167050702998E-7 +cos_p_4: + .double 0r2.0875701408714520036E-9 +cos_p_5: + .double 0r-1.1358718309736171193E-11 + +# .. .5854 +#tan_p_0: +# .double 0r-18086151.600729089230 +#tan_p_1: +# .double 0r1161461.5827075217385 +#tan_p_2: +# .double 0r-13169.960960193349820 +#tan_q_0: +# .double 0r-54258454.802187263966 +#tan_q_1: +# .double 0r25187766.668997306377 +#tan_q_2: +# .double 0r-1329914.3444088697433 +#tan_q_3: +# .double 0r13760.338151945812569 + +# .. .338 +tan_p_0: + .double 0r-18191117.789276178926 +tan_p_1: + .double 0r1167963.0645372841973 +tan_p_2: + .double 0r-13234.676172752202547 +tan_q_0: + .double 0r-54573353.367828540504 +tan_q_1: + .double 0r25333230.540743269026 +tan_q_2: + .double 0r-1337310.4614476819988 +tan_q_3: + .double 0r13827.503083158084337 + +# ..7854 +#tan2_p_0: +# .double 0r3.3283674370620949801E-2 +#tan2_p_1: +# .double 0r2.5663601402049946582E-2 +#tan2_p_2: +# .double 0r-3.1180502523212879186E-4 +#tan2_p_3: +# .double 0r4.9814468378033982921E-7 +#tan2_q_0: +# .double 0r-1.3338299229604572149E-1 +#tan2_q_1: +# .double 0r3.4248261922569253959E-3 +#tan2_q_2: +# .double 0r-1.7860673544716858919E-5 + +# ..3678 +#tan2_p_0: +# .double 0r3.3322645703933846673E-2 +#tan2_p_1: +# .double 0r2.5645883816593488536E-2 +#tan2_p_2: +# .double 0r-3.1101629629724883994E-4 +#tan2_p_3: +# .double 0r4.9436497805178155231E-7 +#tan2_q_0: +# .double 0r-1.3334402096273281768E-1 +#tan2_q_1: +# .double 0r3.4200990512379442383E-3 +#tan2_q_2: +# .double 0r-1.7781628650750523380E-5 + +# ..3908 +tan2_p_0: + .double 0r3.3321283446731200795E-2 +tan2_p_1: + .double 0r2.5646503068216856824E-2 +tan2_p_2: + .double 0r-3.1104383115385493899E-4 +tan2_p_3: + .double 0r4.9449499280323978810E-7 +tan2_q_0: + .double 0r-1.3334538321993547050E-1 +tan2_q_1: + .double 0r3.4202642171272572627E-3 +tan2_q_2: + .double 0r-1.7784380596478880932E-5 + +# .. .2 +tan3_p_0: + .double 0r-18224747.034562580287 +tan3_p_1: + .double 0r1170032.8518652219791 +tan3_p_2: + .double 0r-13254.922404240271135 +tan3_q_0: + .double 0r-54674241.103687740862 +tan3_q_1: + .double 0r25379794.997070763260 +tan3_q_2: + .double 0r-1339662.7778249811381 +tan3_q_3: + .double 0r13848.512058865804647 + +tan_0_5_t: + .double 0r1.25 + .double 0r0.5 +real_0_5_m_1_25_atan_0_5_49_l: + .double 0r-1.8102913770216112547E-16 +real_0_5_m_1_25_atan_0_5_49: + .double 0r-7.9559511251007464239E-2 + +tan_n_0_5_t: + .double 0r1.25 + .double 0r-0.5 +real_1_25_atan_0_5_m_0_5_49_l: + .double 0r1.8102913770216112547E-16 +real_1_25_atan_0_5_m_0_5_49: + .double 0r7.9559511251007464239E-2 + +tan_s_0_5_t: + .double 0r1.5 +sqrt_0_5: + .double 0r7.07106781186547572737E-1 +sqrtn_0_5_m_1_5_atann_sqrt_0_5_l: + .double 0r-3.4791817252377832918E-18 +sqrtn_0_5_m_1_5_atann_sqrt_0_5: + .double 0r-2.1611278181903348372E-1 + +tan_n_s_0_5_t: + .double 0r1.5 +n_sqrt_0_5: + .double 0r-7.07106781186547572737E-1 +n_sqrtn_0_5_m_1_5_atann_sqrt_0_5_l: + .double 0r3.4791817252377832918E-18 +n_sqrtn_0_5_m_1_5_atann_sqrt_0_5: + .double 0r2.1611278181903348372E-1 + +tan_s_2_t: + .double 0r3.0 +sqrt_2_0: + .double 0r1.4142135623730951455 +sqrt_2_m_3_atan_sqrt_2_l: + .double 0r7.4004687159376949604E-17 +sqrt_2_m_3_atan_sqrt_2: + .double 0r-1.4517362920004328597 + +tan_n_s_2_t: + .double 0r3.0 +n_sqrt_2_0: + .double 0r-1.4142135623730951455 +n_sqrt_2_m_3_atan_sqrt_2_l: + .double 0r-7.4004687159376949604E-17 +n_sqrt_2_m_3_atan_sqrt_2: + .double 0r1.4517362920004328597 + +# .. 0.55 +#asin_p_0: +# .double 0r-25.316696852169688014 +#asin_p_1: +# .double 0r53.530200010422156254 +#asin_p_2: +# .double 0r-37.683203270692622766 +#asin_p_3: +# .double 0r9.8280386103977175338 +#asin_p_4: +# .double 0r-0.6933246588625101570 +#asin_q_0: +# .double 0r-151.90018111301861836 +#asin_q_1: +# .double 0r389.53628156345331490 +#asin_q_2: +# .double 0r-360.70299781814912876 +#asin_q_3: +# .double 0r144.63464250990605819 +#asin_q_4: +# .double 0r-23.247871426450686272 + +asin_p_0: + .double 0r-31.133659782158286333 +asin_p_1: + .double 0r63.858157457633225818 +asin_p_2: + .double 0r-43.244454939408448979 +asin_p_3: + .double 0r10.711904434851501122 +asin_p_4: + .double 0r-0.7023102565817557208 +asin_q_0: + .double 0r-186.80195869294971089 +asin_q_1: + .double 0r467.20982615762636669 +asin_q_2: + .double 0r-419.67491247177241576 +asin_q_3: + .double 0r162.03208845920067915 +asin_q_4: + .double 0r-24.808857438965841169 + +asin_c: + +asin2_p_0: + .double 0r31.367022265271895520 +asin2_p_1: + .double 0r-27.553457288163915706 +asin2_p_2: + .double 0r7.3142287109234631615 +asin2_p_3: + .double 0r-0.5695539268055010984 +asin2_p_4: + .double 0r2.7732700457810590539E-3 +asin2_q_0: + .double 0r376.40426718326244782 +asin2_q_1: + .double 0r-415.33244757417799065 +asin2_q_2: + .double 0r156.01490234281396852 +asin2_q_3: + .double 0r-22.702479601946876642 + +// 0.55 .. +#asin2_p_0: +# .double 0r29.355358314463529723 +#asin2_p_1: +# .double 0r-26.147018538045006864 +#asin2_p_2: +# .double 0r7.0703500716163327766 +#asin2_p_3: +# .double 0r-0.5652641176899680930 +#asin2_p_4: +# .double 0r2.9079909341710704143E-3 +#asin2_q_0: +# .double 0r352.26429977355559231 +#asin2_q_1: +# .double 0r-393.02368990519767067 +#asin2_q_2: +# .double 0r149.68540386246363028 +#asin2_q_3: +# .double 0r-22.170690309691252651 + +asin_p1_c: +asin_p1_0h: + .double 0r5.7043710961221538724E-1 +asin_p1_0l: + .double 0r4.9248428721475757358E-23 +asin_p1_1h: + .double 0r1.1881211400032043457 +asin_p1_1l: + .double 0r1.4630145983574038837E-9 +asin_p1_2: + .double 0r4.5284120326143617596E-1 +asin_p1_3: + .double 0r6.2472394124586794195E-1 +asin_p1_4: + .double 0r8.0835086833083236169E-1 +asin_p1_5: + .double 0r1.2595104015172415846 +asin_p1_6: + .double 0r2.0488537552293428767 +asin_p1_7: + .double 0r3.5089140995443472981 +asin_p1_8: + .double 0r6.2851334414813333495 +asin_p1_9: + .double 0r10.128040933414240499 +asin_p1_10: + .double 0r33.046781180882994988 +asin_p1_11: + .double 0r-52.817309641779630169 +asin_p1_12: + .double 0r540.99494117159986217 +asin_p1_13: + .double 0r-1326.1434641099594955 +asin_p1_14: + .double 0r2612.6799241572512074 + +asin_p2_c: +asin_p2_0h: + .double 0r5.7043710961221538724E-1 +asin_p2_0l: + .double 0r4.9248428721475757358E-23 +asin_p2_1h: + .double 0r1.1881211400032043457 +asin_p2_1l: + .double 0r1.4630165967588482090E-9 +asin_p2_2: + .double 0r4.5284120325997734291E-1 +asin_p2_3: + .double 0r6.2472394155362398482E-1 +asin_p2_4: + .double 0r8.0835082941011759949E-1 +asin_p2_5: + .double 0r1.2595128937378745615 +asin_p2_6: + .double 0r2.0487338667125807312 +asin_p2_7: + .double 0r3.5123184925937764866 +asin_p2_8: + .double 0r6.2076105483454178113 +asin_p2_9: + .double 0r11.227381892153145415 +asin_p2_10: + .double 0r20.429661082626292767 +asin_p2_11: + .double 0r35.926476241524127886 +asin_p2_12: + .double 0r55.782391042293575367 +asin_p2_13: + .double 0r64.653236450484357078 +asin_p2_14: + .double 0r39.681910714502151905 + +asin_p3_c: +asin_p3_0h: + .double 0r-5.7043710961221538724E-1 +asin_p3_0l: + .double 0r-4.9248428721475757358E-23 +asin_p3_1h: + .double 0r1.1881211400032043457 +asin_p3_1l: + .double 0r1.4630145983574038837E-9 +asin_p3_2: + .double 0r-4.5284120326143617596E-1 +asin_p3_3: + .double 0r6.2472394124586794195E-1 +asin_p3_4: + .double 0r-8.0835086833083236169E-1 +asin_p3_5: + .double 0r1.2595104015172415846 +asin_p3_6: + .double 0r-2.0488537552293428767 +asin_p3_7: + .double 0r3.5089140995443472981 +asin_p3_8: + .double 0r-6.2851334414813333495 +asin_p3_9: + .double 0r10.128040933414240499 +asin_p3_10: + .double 0r-33.046781180882994988 +asin_p3_11: + .double 0r-52.817309641779630169 +asin_p3_12: + .double 0r-540.99494117159986217 +asin_p3_13: + .double 0r-1326.1434641099594955 +asin_p3_14: + .double 0r-2612.6799241572512074 + +asin_p4_c: +asin_p4_0h: + .double 0r-5.7043710961221538724E-1 +asin_p4_0l: + .double 0r-4.9248428721475757358E-23 +asin_p4_1h: + .double 0r1.1881211400032043457 +asin_p4_1l: + .double 0r1.4630165967588482090E-9 +asin_p4_2: + .double 0r-4.5284120325997734291E-1 +asin_p4_3: + .double 0r6.2472394155362398482E-1 +asin_p4_4: + .double 0r-8.0835082941011759949E-1 +asin_p4_5: + .double 0r1.2595128937378745615 +asin_p4_6: + .double 0r-2.0487338667125807312 +asin_p4_7: + .double 0r3.5123184925937764866 +asin_p4_8: + .double 0r-6.2076105483454178113 +asin_p4_9: + .double 0r11.227381892153145415 +asin_p4_10: + .double 0r-20.429661082626292767 +asin_p4_11: + .double 0r35.926476241524127886 +asin_p4_12: + .double 0r-55.782391042293575367 +asin_p4_13: + .double 0r64.653236450484357078 +asin_p4_14: + .double 0r-39.681910714502151905 + +acos_p_0: + .double 0r-23.490891740111308650 +acos_p_1: + .double 0r50.216364730997767651 +acos_p_2: + .double 0r-35.846130541173899076 +acos_p_3: + .double 0r9.5237685361261430472 +acos_p_4: + .double 0r-0.68995264971619463257 +acos_q_0: + .double 0r-140.94535044067245622 +acos_q_1: + .double 0r364.72359608470605963 +acos_q_2: + .double 0r-341.44918263146718118 +acos_q_3: + .double 0r138.79408611688072028 +acos_q_4: + .double 0r-22.704016182082593645 + +acos2_p_0: + .double 0r30.187145498002834643 +acos2_p_1: + .double 0r-26.731562531992352660 +acos2_p_2: + .double 0r7.1724615825161839311 +acos2_p_3: + .double 0r-0.56708755287485912433 +acos2_p_4: + .double 0r2.8497262016782816223E-3 +acos2_q_0: + .double 0r362.24574597603304937 +acos2_q_1: + .double 0r-402.28404322843209684 +acos2_q_2: + .double 0r152.32592108178161538 +acos2_q_3: + .double 0r-22.394016829865698526 + +acos_p1_c: +acos_p1_0h: + .double 0r8.6321189001609244240E-1 +acos_p1_0l: + .double 0r-2.7467628254551864260E-23 +acos_p1_1h: + .double 0r-1.3159033656120300293 +acos_p1_1l: + .double 0r-2.4367664774871400368E-8 +acos_p1_2: + .double 0r-7.4055169144604560127E-1 +acos_p1_3: + .double 0r-1.2132914821382745352 +acos_p1_4: + .double 0r-2.1344587603015683364 +acos_p1_5: + .double 0r-4.3088049195660635959 +acos_p1_6: + .double 0r-9.2462207583022237856 +acos_p1_7: + .double 0r-20.778559358953092584 +acos_p1_8: + .double 0r-48.776431616826357640 +acos_p1_9: + .double 0r-105.97079800530472937 +acos_p1_10: + .double 0r-413.00216737064670269 +acos_p1_11: + .double 0r662.354234414432198719 +acos_p1_12: + .double 0r-11016.701631003812508 +acos_p1_13: + .double 0r35318.9338646810210776 +acos_p1_14: + .double 0r-97099.956120653485414 + +acos_p2_c: +acos_p2_0h: + .double 0r8.6321189001609244240E-1 +acos_p2_0l: + .double 0r-2.7467628254551864260E-23 +acos_p2_1h: + .double 0r-1.3159033656120300293 +acos_p2_1l: + .double 0r-2.4367664108737585593E-8 +acos_p2_2: + .double 0r-7.4055169144296173478E-1 +acos_p2_3: + .double 0r-1.2132914822347449224 +acos_p2_4: + .double 0r-2.1344586500282272112 +acos_p2_5: + .double 0r-4.3088086958273930094 +acos_p2_6: + .double 0r-9.2457168210006930309 +acos_p2_7: + .double 0r-20.790839559740305731 +acos_p2_8: + .double 0r-48.251128736941033992 +acos_p2_9: + .double 0r-114.02209884122218853 +acos_p2_10: + .double 0r-267.05630398433208939 +acos_p2_11: + .double 0r-584.65850073182002689 +acos_p2_12: + .double 0r-1076.6291620946751664 +acos_p2_13: + .double 0r-1407.9468675552657260 +acos_p2_14: + .double 0r-936.98633843723951031 + +atan4_p_c: +atan4_p_0: + .double 0r-51.285593469956815227 +atan4_p_1: + .double 0r-100.64117630107490697 +atan4_p_2: + .double 0r-64.297908354955467303 +atan4_p_3: + .double 0r-14.727705326536764119 +atan4_p_4: + .double 0r-0.8688437904180191795 +atan4_q_0: + .double 0r153.85678040987045279 +atan4_q_1: + .double 0r394.23759714914689312 +atan4_q_2: + .double 0r363.49766317870364674 +atan4_q_3: + .double 0r144.60833714965690433 +atan4_q_4: + .double 0r23.038413151078877661 + +n_45_d_256: + .double 0r0.175781250000 +n_m_45_d_256: + .double 0r-0.175781250000 + +atan_p_9_0: + .double 0r-3.3333333333333309279E-1 +atan_p_9_1: + .double 0r1.9999999999984593990E-1 +atan_p_9_2: + .double 0r-1.4285714282482467330E-1 +atan_p_9_3: + .double 0r1.1111110782523182794E-1 +atan_p_9_4: + .double 0r-9.0908902574698829957E-2 +atan_p_9_5: + .double 0r7.6916554867419328900E-2 +atan_p_9_6: + .double 0r-6.6526935532345926605E-2 +atan_p_9_7: + .double 0r5.6999867343679795628E-2 +atan_p_9_8: + .double 0r-3.9093313704495452621E-2 + +atan_0_5_52: + .double 0r4.6364760900080603800E-1 +atan_0_5_52_l: + .double 0r7.8209928684219507951E-17 +atan_0_5_real_0_5: + .double 0r0.5 + +n_atan_0_5_52: + .double 0r-4.6364760900080603800E-1 +n_atan_0_5_52_l: + .double 0r-7.8209928684219507951E-17 +n_atan_0_5_real_0_5: + .double 0r-0.5 + +atan_n_0_5_8_c: +atan_n_0_5_8_0: + .double 0r-3.3333333333333192705E-1 +atan_n_0_5_8_1: + .double 0r1.9999999999920992089E-1 +atan_n_0_5_8_2: + .double 0r-1.4285714269165267210E-1 +atan_n_0_5_8_3: + .double 0r1.1111109349119581424E-1 +atan_n_0_5_8_4: + .double 0r-9.0908019792184269203E-2 +atan_n_0_5_8_5: + .double 0r7.6884259191928067123E-2 +atan_n_0_5_8_6: + .double 0r-6.5833773159939656949E-2 +atan_n_0_5_8_7: + .double 0r4.8945313616423499758E-2 + +atan_1_53: + .double 0r7.8539816339744827900E-1 +atan_1_53_l: + .double 0r3.0616169978683830179E-17 + +n_atan_1_53: + .double 0r-7.8539816339744827900E-1 +n_atan_1_53_l: + .double 0r-3.0616169978683830179E-17 + +atan_n_8_c: +atan_n_8_0: + .double 0r-3.3333333333333325932E-1 +atan_n_8_1: + .double 0r1.9999999999993750555E-1 +atan_n_8_2: + .double 0r-1.4285714283792697032E-1 +atan_n_8_3: + .double 0r1.1111110814149251225E-1 +atan_n_8_4: + .double 0r-9.0908830368634924168E-2 +atan_n_8_5: + .double 0r7.6909493951295496150E-2 +atan_n_8_6: + .double 0r-6.6248652556378059830E-2 +atan_n_8_7: + .double 0r5.1752788427847688790E-2 +atan_n_8_real_40_bits: + .quad 0x0ffffffffffffe000 +atan_n_8_real_abs_40_bits: + .quad 0x07fffffffffffe000 +atan_n_8_real_2_p_m_12: + .double 0r2.44140625E-4 + +atan_p_8_c: +atan_p_8_0: + .double 0r-3.3333333333333331483E-1 +atan_p_8_1: + .double 0r1.9999999999997875033E-1 +atan_p_8_2: + .double 0r-1.4285714284904510452E-1 +atan_p_8_3: + .double 0r1.1111110961868993219E-1 +atan_p_8_4: + .double 0r-9.0908937814301260527E-2 +atan_p_8_5: + .double 0r7.6913854250452912131E-2 +atan_p_8_6: + .double 0r-6.6341308897488418528E-2 +atan_p_8_7: + .double 0r5.2556144706942850564E-2 +atan_p_8_real_39_bits: + .quad 0x0ffffffffffffc000 +atan_p_8_abs_real_39_bits: + .quad 0x07fffffffffffc000 +atan_p_8_real_2_p_m_12: + .double 0r2.44140625E-4 + +atan_2_0: + .double 0r1.1071487177940904090 +atan_2_0_l: + .double 0r9.4044713735663794124E-17 +atan_2_0_real_2_0: + .double 0r2.0 + +n_atan_2_0: + .double 0r-1.1071487177940904090 +n_atan_2_0_l: + .double 0r-9.4044713735663794124E-17 +n_atan_2_0_real_2_0: + .double 0r-2.0 + +atan_n_2_8_c: +atan_n_2_8_0: + .double 0r-3.3333333333333042825E-1 +atan_n_2_8_1: + .double 0r1.9999999999849707444E-1 +atan_n_2_8_2: + .double 0r-1.4285714256949089140E-1 +atan_n_2_8_3: + .double 0r1.1111108323968375833E-1 +atan_n_2_8_4: + .double 0r-9.0907550811429177484E-2 +atan_n_2_8_5: + .double 0r7.6872320463257509671E-2 +atan_n_2_8_6: + .double 0r-6.5674898157514843833E-2 +atan_n_2_8_7: + .double 0r4.8083662211730243685E-2 + +real_atan_large: + .double 0r5805358775541310.0840 +real_atan_m_large: + .double 0r-5805358775541310.0840 + +exp_p0_c: +exp_p0_2: + .double 0r4.9999999999999900080E-1 +exp_p0_3: + .double 0r1.6666666666678625619E-1 +exp_p0_4: + .double 0r4.1666666661027668139E-2 +exp_p0_5: + .double 0r8.3333334724719175524E-3 +exp_p0_6: + .double 0r1.3888868951434380893E-3 +exp_p0_7: + .double 0r1.9842988288729944998E-4 +exp_p0_8: + .double 0r2.4713876429938641863E-5 +exp_p0_9: + .double 0r2.9988651596576767477E-6 + +exp_p1_c: +exp_p1_0: + .double 0r1.1921772413532716328 +exp_p1_0_l: + .double 0r-5.3111925243256501606E-17 +exp_p1_1_h: + .double 0r1.1921772360801696777 +exp_p1_1_l: + .double 0r5.2731019550833480025E-9 +exp_p1_2: + .double 0r5.9608862067662515827E-1 +exp_p1_3: + .double 0r1.9869620689294639759E-1 +exp_p1_4: + .double 0r4.9674051698174313818E-2 +exp_p1_5: + .double 0r9.9348108199627475218E-3 +exp_p1_6: + .double 0r1.6557962681130399766E-3 +exp_p1_7: + .double 0r2.3658136938083061391E-4 +exp_p1_8: + .double 0r2.9408132193586159414E-5 +exp_p1_9: + .double 0r3.6463907908503166658E-6 + +exp_m0_c: +exp_m0_2: + .double 0r4.9999999999999916733E-1 +exp_m0_3: + .double 0r1.6666666666656548834E-1 +exp_m0_4: + .double 0r4.1666666661841822439E-2 +exp_m0_5: + .double 0r8.3333332132295139666E-3 +exp_m0_6: + .double 0r1.3888871537291139647E-3 +exp_m0_7: + .double 0r1.9839759941175428605E-4 +exp_m0_8: + .double 0r2.4723414228338120148E-5 +exp_m0_9: + .double 0r2.5328445339119633100E-6 + +exp_m1_c: +exp_m1_0: + .double 0r8.3880145108698256351E-1 +exp_m1_0_l: + .double 0r-3.1258096785010882408E-17 +exp_m1_1_h: + .double 0r8.3880144357681274414E-1 +exp_m1_1_l: + .double 0r7.5101698193691390770E-9 +exp_m1_2: + .double 0r4.1940072554349638878E-1 +exp_m1_3: + .double 0r1.3980024184811054577E-1 +exp_m1_4: + .double 0r3.4950060469065312441E-2 +exp_m1_5: + .double 0r6.9900121811488523035E-3 +exp_m1_6: + .double 0r1.1650024396098284090E-3 +exp_m1_7: + .double 0r1.6642701498218752582E-4 +exp_m1_8: + .double 0r2.0771521593253968396E-5 +exp_m1_9: + .double 0r2.1676257763527338661E-6 + +# ln < 1 + +#ln_s_c: +#ln_s_p_0: +# .double 0r9.1503481690678860616 +#ln_s_p_1: +# .double 0r20.941485866336655874 +#ln_s_p_2: +# .double 0r16.487417954606193859 +#ln_s_p_3: +# .double 0r5.1186080224958212526 +#ln_s_p_4: +# .double 0r4.9845734945458103349E-1 +#ln_s_q_0: +# .double 0r27.451044507203782530 +#ln_s_q_1: +# .double 0r83.412740979424398802 +#ln_s_q_2: +# .double 0r95.551182894484938402 +#ln_s_q_3: +# .double 0r50.697088912299392405 +#ln_s_q_4: +# .double 0r12.129116214794825268 + +# ln .83 .. 1 + +#ln_s_c: +#ln_s_p_0: +# .double 0r10.602228952834687803 +#ln_s_p_1: +# .double 0r23.654147261303222649 +#ln_s_p_2: +# .double 0r18.048399385285609497 +#ln_s_p_3: +# .double 0r5.3866317747411187611 +#ln_s_p_4: +# .double 0r4.9866367203621170257E-1 +#ln_s_q_0: +# .double 0r31.806686858504065185 +#ln_s_q_1: +# .double 0r94.817456927787816312 +#ln_s_q_2: +# .double 0r106.17427873659880788 +#ln_s_q_3: +# .double 0r54.803473649357279385 +#ln_s_q_4: +# .double 0r12.671320252177885379 + +# ln .8243 .. 1 + +ln_s_c: +ln_s_p_0: + .double 0r10.533214407052481576 +ln_s_p_1: + .double 0r23.526643885226203423 +ln_s_p_2: + .double 0r17.976077060886193948 +ln_s_p_3: + .double 0r5.3744490269565492468 +ln_s_p_4: + .double 0r4.9865516811442134326E-1 +ln_s_q_0: + .double 0r31.599643221157446504 +ln_s_q_1: + .double 0r94.279664071546818604 +ln_s_q_2: + .double 0r105.67819330362900132 +ln_s_q_3: + .double 0r54.614015226380423940 +ln_s_q_4: + .double 0r12.646688742839442554 + +# ln >= 1 + +#ln_b_c: +#ln_b_p_0: +# .double 0r17.467871503051810578 +#ln_b_p_1: +# .double 0r35.785852544901331385 +#ln_b_p_2: +# .double 0r24.552006690337957906 +#ln_b_p_3: +# .double 0r6.4047236758045062999 +#ln_b_p_4: +# .double 0r4.9917096570160635061E-1 +#ln_b_q_0: +# .double 0r52.403614509155588053 +#ln_b_q_1: +# .double 0r146.66026851655587393 +#ln_b_q_2: +# .double 0r152.20905275348258101 +#ln_b_q_3: +# .double 0r71.576606726420877180 +#ln_b_q_4: +# .double 0r14.725978745938864023 + +# ln 1.0 .. 1.25 + +ln_b_c: +ln_b_p_0: + .double 0r15.580409683056638315 +ln_b_p_1: + .double 0r32.543914666072645048 +ln_b_p_2: + .double 0r22.877901706403754645 +ln_b_p_3: + .double 0r6.1556714513765760088 +ln_b_p_4: + .double 0r4.9907618814155391140E-1 +ln_b_q_0: + .double 0r46.741229049169916721 +ln_b_q_1: + .double 0r132.68766578509504939 +ln_b_q_2: + .double 0r140.10471702854880505 +ln_b_q_3: + .double 0r67.303567178530471438 +ln_b_q_4: + .double 0r14.223951326720150945 + +# log10 < 1 + +#log10_s_c: +#log10_s_p_0: +# .double 0r3.9766162055986344903 +#log10_s_p_1: +# .double 0r9.0998450829915000782 +#log10_s_p_2: +# .double 0r7.1633761863796170388 +#log10_s_p_3: +# .double 0r2.2235094671281157019 +#log10_s_p_4: +# .double 0r2.1647774010462134120E-1 +#log10_s_q_0: +# .double 0r27.469491586710010012 +#log10_s_q_1: +# .double 0r83.461621599998238707 +#log10_s_q_2: +# .double 0r95.597370915186246521 +#log10_s_q_3: +# .double 0r50.715260287037992271 +#log10_s_q_4: +# .double 0r12.131569554734877414 + +# log10 .83 .. 1 + +log10_s_c: +log10_s_p_0: + .double 0r4.6028186381430433727 +log10_s_p_1: + .double 0r10.269763424759014825 +log10_s_p_2: + .double 0r7.8365495824134949743 +log10_s_p_3: + .double 0r2.3390837249823710486 +log10_s_p_4: + .double 0r2.1656666260147883207E-1 +log10_s_q_0: + .double 0r31.795144745829979627 +log10_s_q_1: + .double 0r94.787371070649314220 +log10_s_q_2: + .double 0r106.14640820241447727 +log10_s_q_3: + .double 0r54.792773831672434426 +log10_s_q_4: + .double 0r12.669919891359308295 + +# log10 >= 1 + +#log10_b_c: +#log10_b_p_0: +# .double 0r7.5811637929542987635 +#log10_b_p_1: +# .double 0r15.533033113504695066 +#log10_b_p_2: +# .double 0r10.658434759740712749 +#log10_b_p_3: +# .double 0r2.7808975967872808788 +#log10_b_p_4: +# .double 0r2.1678697192356721768E-1 +#log10_b_q_0: +# .double 0r52.368824211608433927 +#log10_b_q_1: +# .double 0r146.57500964710882840 +#log10_b_q_2: +# .double 0r152.13582168617784873 +#log10_b_q_3: +# .double 0r71.551032626191883423 +#log10_b_q_4: +# .double 0r14.723015954370795910 + +# log10 1 .. 1.25 + +log10_b_c: +log10_b_p_0: + .double 0r6.7674014091633534207 +log10_b_p_1: + .double 0r14.135207984866871911 +log10_b_p_2: + .double 0r9.9365500005391176330 +log10_b_p_3: + .double 0r2.6734926843772655047 +log10_b_p_4: + .double 0r2.1674607755999492076E-1 +log10_b_q_0: + .double 0r46.747552809139307328 +log10_b_q_1: + .double 0r132.70322218382881374 +log10_b_q_2: + .double 0r140.11814067349982338 +log10_b_q_3: + .double 0r67.308281802680511419 +log10_b_q_4: + .double 0r14.224501421540916013 + +log2_b_c: +log2_b_p_0: + .double 0r5.7248655744376097942 +log2_b_p_1: + .double 0r9.7037982066316619267 +log2_b_p_2: + .double 0r4.9251104233553251177 +log2_b_p_3: + .double 0r0.71604182547154882066 +log2_b_p_4: + .double 0r0.00024384785289470863262 +log2_b_q_0: + .double 0r11.904523296020823153 +log2_b_q_1: + .double 0r29.106873574779058345 +log2_b_q_2: + .double 0r24.928920420570964467 +log2_b_q_3: + .double 0r8.6737948511229703286 + +log2_s_c: +log2_s_p_0: + .double 0r3.6543174401668601092 +log2_s_p_1: + .double 0r6.9236821167562956347 +log2_s_p_2: + .double 0r4.0552858971538414679 +log2_s_p_3: + .double 0r0.71265600724817212974 +log2_s_p_4: + .double 0r0.00061552202164347354606 +log2_s_q_0: + .double 0r7.5989394915663739383 +log2_s_q_1: + .double 0r20.096596833512705871 +log2_s_q_2: + .double 0r18.945813884202145516 +log2_s_q_3: + .double 0r7.4327985038948378715 + +exp2_p0_25_c: +exp2_p0_25_0h: + .double 0r1.1892071150027210269 +exp2_p0_25_0l: + .double 0r3.9820152314656461110E-17 +exp2_p0_25_1h: + .double 0r8.2429555058479309082E-1 +exp2_p0_25_1l: + .double 0r8.2811696389128996998E-9 +exp2_p0_25_2: + .double 0r2.8567907128801478533E-1 +exp2_p0_25_3: + .double 0r6.6005880936045882579E-2 +exp2_p0_25_4: + .double 0r1.1437947568180147420E-2 +exp2_p0_25_5: + .double 0r1.5856362256660922264E-3 +exp2_p0_25_6: + .double 0r1.8317976935970924383E-4 +exp2_p0_25_7: + .double 0r1.8139623479154579359E-5 +exp2_p0_25_8: + .double 0r1.5673360359583023396E-6 +exp2_p0_25_9: + .double 0r1.3055968895300254492E-7 + +exp2_p0_c: +exp2_p0_0h: + .double 0r1.0 +exp2_p0_0l: + .double 0r0.0 +exp2_p0_1h: + .double 0r6.9314716756343841553E-1 +exp2_p0_1l: + .double 0r1.2996506870699420233E-8 +exp2_p0_2: + .double 0r2.4022650695910330310E-1 +exp2_p0_3: + .double 0r5.5504108664725972100E-2 +exp2_p0_4: + .double 0r9.6181291092419882865E-3 +exp2_p0_5: + .double 0r1.3333558019987959767E-3 +exp2_p0_6: + .double 0r1.5403532681856270284E-4 +exp2_p0_7: + .double 0r1.5253065510864060166E-5 +exp2_p0_8: + .double 0r1.3190300302967161470E-6 +exp2_p0_9: + .double 0r1.0881074348847176580E-7 + +exp2_m0_c: +exp2_m0_0h: + .double 0r1.0 +exp2_m0_0l: + .double 0r0.0 +exp2_m0_1h: + .double 0r6.9314716756343841553E-1 +exp2_m0_1l: + .double 0r1.2996506870699420233E-8 +exp2_m0_2: + .double 0r2.4022650695909780749E-1 +exp2_m0_3: + .double 0r5.5504108664691832742E-2 +exp2_m0_4: + .double 0r9.6181291047027026725E-3 +exp2_m0_5: + .double 0r1.3333557764085299593E-3 +exp2_m0_6: + .double 0r1.5403499520940549562E-4 +exp2_m0_7: + .double 0r1.5251161994200753343E-5 +exp2_m0_8: + .double 0r1.3166061357593046239E-6 +exp2_m0_9: + .double 0r9.2942318847941285694E-8 + +exp2_m0_25_c: +exp2_m0_25_0h: + .double 0r8.4089641525371450204E-1 +exp2_m0_25_0l: + .double 0r4.0995050102907482601E-17 +exp2_m0_25_1h: + .double 0r5.8286496996879577637E-1 +exp2_m0_25_1l: + .double 0r9.4072813983103742430E-9 +exp2_m0_25_2: + .double 0r2.0200560855082275169E-1 +exp2_m0_25_3: + .double 0r4.6673206007799013240E-2 +exp2_m0_25_4: + .double 0r8.0878502812528896282E-3 +exp2_m0_25_5: + .double 0r1.1212140390502213245E-3 +exp2_m0_25_6: + .double 0r1.2952709156943405071E-4 +exp2_m0_25_7: + .double 0r1.2823060087120729985E-5 +exp2_m0_25_8: + .double 0r1.1036110895925728151E-6 +exp2_m0_25_9: + .double 0r7.4921899653437894396E-8 + +real_m_1022: + .double 0r-1022.0 +real_1023: + .double 0r1023.0 +real_1024: + .double 0r1024.0 +real_1025: + .double 0r1025.0 +real_m_1076: + .double 0r-1076.0 +real_2_p_m_1022: + .quad 0x00010000000000000 +real_2_p_m_55: + .quad 0x03C80000000000000 +real_2_p_53: + .quad 0x04340000000000000 +real_m_2_p_53: + .quad 0x0C340000000000000 +real_2_p_55: + .quad 0x04360000000000000 +real_2_p_1023: + .quad 0x07fe0000000000000 +real_max: + .quad 0x07fefffffffffffff +real_m_max: + .quad 0x0ffefffffffffffff + +real_4000_0: + .double 0r4000.0 +real_m_3810_0: + .double 0r-3810.0 +real_1401_0: + .double 0r1401.0 +real_m_1471_0: + .double 0r-1471.0 +real_4605_0: + .double 0r4605.0 +real_m_4834_0: + .double 0r-4834.0 +real_1_052: + .double 0r1.052 +real_m_1_052: + .double 0r-1.052 +real_power_exp_too_large: + .double 0r6711563375777760768.0 +real_power_exp_too_small: + .double 0r-6393154322601327104.0 + diff --git a/macho64/astartup.s b/macho64/astartup.s new file mode 100644 index 0000000..8f2bc01 --- /dev/null +++ b/macho64/astartup.s @@ -0,0 +1,5119 @@ + + .set LINUX,1 + .set USE_LIBM,0 + .set NEW_DESCRIPTORS,1 + .set MEASURE_GC,0 + + .macro att_jmp + .att_syntax + jmp $0 + .intel_syntax noprefix + .endmacro + + .macro att_call + .att_syntax + call $0 + .intel_syntax noprefix + .endmacro + + .macro att_je + .att_syntax + je $0 + .intel_syntax noprefix + .endmacro + + .macro att_jne + .att_syntax + jne $0 + .intel_syntax noprefix + .endmacro + + .macro att_ja + .att_syntax + ja $0 + .intel_syntax noprefix + .endmacro + + .macro att_jae + .att_syntax + jae $0 + .intel_syntax noprefix + .endmacro + + .macro att_jb + .att_syntax + jb $0 + .intel_syntax noprefix + .endmacro + + .macro att_jbe + .att_syntax + jbe $0 + .intel_syntax noprefix + .endmacro + + .macro att_jc + .att_syntax + jc $0 + .intel_syntax noprefix + .endmacro + + .macro att_jnc + .att_syntax + jnc $0 + .intel_syntax noprefix + .endmacro + + .macro att_jg + .att_syntax + jg $0 + .intel_syntax noprefix + .endmacro + + .macro att_jge + .att_syntax + jge $0 + .intel_syntax noprefix + .endmacro + + .macro att_jl + .att_syntax + jl $0 + .intel_syntax noprefix + .endmacro + + .macro att_jle + .att_syntax + jle $0 + .intel_syntax noprefix + .endmacro + + .macro att_jz + .att_syntax + jz $0 + .intel_syntax noprefix + .endmacro + +/* File: astartup.s */ +/* Author: John van Groningen */ +/* Machine: amd64 */ + +/* r10 = r10 */ +/* r11 = r11 */ +/* r12 = r12 */ +/* r13 = r13 */ + +/* r11d = r11d */ +/* r12d = r12d */ + +/* r10b = r10b */ + + .if LINUX + .intel_syntax noprefix + .endif + + .if ! LINUX + .globl convert_real_to_string + .endif + .if ! LINUX + .globl write_heap + .endif + .globl _return_code + .globl _execution_aborted + .globl e____system__kFinalizerGCTemp + .globl e____system__kFinalizer + + .if LINUX + .globl _times + .globl _exit + .else + .globl GetTickCount + .globl ExitProcess + .endif + + .if USE_LIBM + .globl cos + .globl sin + .globl tan + .globl atan + .endif + + .data + .align 3 + +semi_space_size: .quad 0 + +heap_p1: .quad 0 +heap_p2: .quad 0 +heap_p3: .quad 0 +neg_heap_p3: .quad 0 +end_heap_p3: .quad 0 +vector_p: .quad 0 +vector_counter: .quad 0 +neg_heap_vector_plus_4: .quad 0 + +heap_size_64_65: .quad 0 +heap_vector: .quad 0 +stack_top: .quad 0 +end_vector: .quad 0 + +heap_size_257: .quad 0 +heap_copied_vector: .quad 0 + +heap_end_after_gc: .quad 0 +extra_heap: .quad 0 +extra_heap_size: .quad 0 +stack_p: .quad 0 +halt_sp: .quad 0 + +n_allocated_words: .quad 0 + +last_time: .quad 0 +execute_time: .quad 0 +garbage_collect_time: .quad 0 +IO_time: .quad 0 + +compact_garbage_collect_time: .quad 0 +mark_compact_garbage_collect_time: .quad 0 +total_gc_bytes: .quad 0 +total_compact_gc_bytes: .quad 0 + + .globl saved_heap_p +saved_heap_p: + .quad 0 + .quad 0 + + .globl saved_a_stack_p +saved_a_stack_p: .quad 0 + + .globl end_a_stack +end_a_stack: .quad 0 + + .globl int_to_real_scratch +int_to_real_scratch: .quad 0 + +heap_end_write_heap: .quad 0 +d3_flag_write_heap: .quad 0 +heap2_begin_and_end: + .quad 0 + .quad 0 + + .globl a_stack_guard_page +a_stack_guard_page: .quad 0 + + .globl profile_stack_pointer +profile_stack_pointer: .quad 0 + +dll_initisialised: .quad 0 + .globl end_b_stack +end_b_stack: .quad 0 +basic_only: .quad 0 +heap_size_65: .quad 0 +heap_copied_vector_size: .quad 0 +heap_end_after_copy_gc: .quad 0 +heap_mbp: .quad 0 +heap_p: .quad 0 +stack_mbp: .quad 0 + +bit_counter: + .quad 0 +bit_vector_p: + .quad 0 +zero_bits_before_mark: + .quad 1 +n_free_words_after_mark: + .quad 1000 +n_last_heap_free_bytes: + .quad 0 +lazy_array_list: + .quad 0 +n_marked_words: + .quad 0 +end_stack: + .quad 0 + +bit_vector_size: + .quad 0 + +caf_list: + .quad 0 + .globl caf_listp +caf_listp: + .quad 0 + +zero_length_string: + .quad __STRING__+2 + .quad 0 +true_string: + .quad __STRING__+2 + .quad 4 +true_c_string: + .ascii "True" + .byte 0,0,0,0 +false_string: + .quad __STRING__+2 + .quad 5 +false_c_string: + .ascii "False" + .byte 0,0,0 +file_c_string: + .ascii "File" + .byte 0,0,0,0 +garbage_collect_flag: + .byte 0 + .byte 0,0,0 + + .comm sprintf_buffer,32 + +out_of_memory_string_1: + .ascii "Not enough memory to allocate heap and stack" + .byte 10,0 +printf_int_string: + .ascii "%d" + .byte 0 +printf_real_string: + .ascii "%.15g" + .byte 0 +printf_string_string: + .ascii "%s" + .byte 0 +printf_char_string: + .ascii "%c" + .byte 0 +garbage_collect_string_1: + .ascii "A stack: " + .byte 0 +garbage_collect_string_2: + .ascii " bytes. BC stack: " + .byte 0 +garbage_collect_string_3: + .ascii " bytes." + .byte 10,0 +heap_use_after_gc_string_1: + .ascii "Heap use after garbage collection: " + .byte 0 +heap_use_after_compact_gc_string_1: + .ascii "Heap use after compacting garbage collection: " + .byte 0 +heap_use_after_gc_string_2: + .ascii " Bytes." + .byte 10,0 +stack_overflow_string: + .ascii "Stack overflow." + .byte 10,0 +out_of_memory_string_4: + .ascii "Heap full." + .byte 10,0 +time_string_1: + .ascii "Execution: " + .byte 0 +time_string_2: + .ascii " Garbage collection: " + .byte 0 + +time_string_3: + .ascii " " + .byte 0 + +time_string_4: + .ascii " Total: " + .byte 0 +high_index_string: + .ascii "Index too high in UPDATE string." + .byte 10,0 +low_index_string: + .ascii "Index negative in UPDATE string." + .byte 10,0 +IO_error_string: + .ascii "IO error: " + .byte 0 +new_line_string: + .byte 10,0 + +sprintf_time_string: + .ascii "%d.%02d" + .byte 0 + +marked_gc_string_1: + .ascii "Marked: " + .byte 0 + + .if PROFILE + .align 3 +m_system: + .long 6 + .ascii "System" + .byte 0 + .byte 0 + + .long m_system-. +garbage_collector_name: + .quad 0 + .ascii "garbage_collector" + .byte 0 + .align 3 + .endif + + .align 4 + .globl sign_real_mask +sign_real_mask: + .quad 0x8000000000000000,0x8000000000000000 + .globl abs_real_mask +abs_real_mask: + .quad 0x7fffffffffffffff,0x7fffffffffffffff + + .align 3 +NAN_real: + .long 0x0ffffffff,0x7fffffff +one_real: + .long 0x00000000,0x3ff00000 +zero_real: + .long 0x00000000,0x00000000 + + .align 2 +bit_set_table: + .long 0x00000001,0x00000002,0x00000004,0x00000008 + .long 0x00000010,0x00000020,0x00000040,0x00000080 + .long 0x00000100,0x00000200,0x00000400,0x00000800 + .long 0x00001000,0x00002000,0x00004000,0x00008000 + .long 0x00010000,0x00020000,0x00040000,0x00080000 + .long 0x00100000,0x00200000,0x00400000,0x00800000 + .long 0x01000000,0x02000000,0x04000000,0x08000000 + .long 0x10000000,0x20000000,0x40000000,0x80000000 + .long 0 +bit_set_table2: + .long 0x00000001,0,0x00000002,0,0x00000004,0,0x00000008,0 + .long 0x00000010,0,0x00000020,0,0x00000040,0,0x00000080,0 + .long 0x00000100,0,0x00000200,0,0x00000400,0,0x00000800,0 + .long 0x00001000,0,0x00002000,0,0x00004000,0,0x00008000,0 + .long 0x00010000,0,0x00020000,0,0x00040000,0,0x00080000,0 + .long 0x00100000,0,0x00200000,0,0x00400000,0,0x00800000,0 + .long 0x01000000,0,0x02000000,0,0x04000000,0,0x08000000,0 + .long 0x10000000,0,0x20000000,0,0x40000000,0,0x80000000,0 + .long 0,0 +bit_clear_table: + .long 0x0fffffffe,0x0fffffffd,0x0fffffffb,0x0fffffff7 + .long 0x0ffffffef,0x0ffffffdf,0x0ffffffbf,0x0ffffff7f + .long 0x0fffffeff,0x0fffffdff,0x0fffffbff,0x0fffff7ff + .long 0x0ffffefff,0x0ffffdfff,0x0ffffbfff,0x0ffff7fff + .long 0x0fffeffff,0x0fffdffff,0x0fffbffff,0x0fff7ffff + .long 0x0ffefffff,0x0ffdfffff,0x0ffbfffff,0x0ff7fffff + .long 0x0feffffff,0x0fdffffff,0x0fbffffff,0x0f7ffffff + .long 0x0efffffff,0x0dfffffff,0x0bfffffff,0x7fffffff + .long 0x0ffffffff +bit_clear_table2: + .long 0x0fffffffe,-1,0x0fffffffd,-1,0x0fffffffb,-1,0x0fffffff7,-1 + .long 0x0ffffffef,-1,0x0ffffffdf,-1,0x0ffffffbf,-1,0x0ffffff7f,-1 + .long 0x0fffffeff,-1,0x0fffffdff,-1,0x0fffffbff,-1,0x0fffff7ff,-1 + .long 0x0ffffefff,-1,0x0ffffdfff,-1,0x0ffffbfff,-1,0x0ffff7fff,-1 + .long 0x0fffeffff,-1,0x0fffdffff,-1,0x0fffbffff,-1,0x0fff7ffff,-1 + .long 0x0ffefffff,-1,0x0ffdfffff,-1,0x0ffbfffff,-1,0x0ff7fffff,-1 + .long 0x0feffffff,-1,0x0fdffffff,-1,0x0fbffffff,-1,0x0f7ffffff,-1 + .long 0x0efffffff,-1,0x0dfffffff,-1,0x0bfffffff,-1,0x7fffffff,-1 + .long 0x0ffffffff,-1 +first_one_bit_table: + .byte -1,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 + + .align 2 + .comm sprintf_time_buffer,20 + + .align 3 + +/* .globl small_integers */ + .comm small_integers,33*16 +/* .globl static_characters */ + .comm static_characters,256*16 + +/* .globl clean_exception_handler */ +/* .globl clean_unwind_info */ +/*clean_unwind_info: */ +/* DD 000000009H */ +/* DD imagerel(clean_exception_handler) */ + + + .text + + .globl _abc_main + .globl print + .globl print_char + .globl print_int + .globl print_real + .globl print__string__ + .globl print__chars__sc + .globl print_sc + .globl print_symbol + .globl print_symbol_sc + .globl printD + .globl DtoAC + .globl push_t_r_args + .globl push_a_r_args + .globl halt + .globl dump + + .globl catAC + .globl sliceAC + .globl updateAC + .globl eqAC + .globl cmpAC + + .globl string_to_string_node + .globl int_array_to_node + .globl real_array_to_node + + .globl _create_arrayB + .globl _create_arrayC + .globl _create_arrayI + .globl _create_arrayI32 + .globl _create_arrayR + .globl _create_arrayR32 + .globl _create_r_array + .globl create_array + .globl create_arrayB + .globl create_arrayC + .globl create_arrayI + .globl create_arrayI32 + .globl create_arrayR + .globl create_arrayR32 + .globl create_R_array + + .globl BtoAC + .globl ItoAC + .globl RtoAC + .globl eqD + + .globl collect_0 + .globl collect_1 + .globl collect_2 + .globl collect_3 + + .globl yet_args_needed + .globl yet_args_needed_0 + .globl yet_args_needed_1 + .globl yet_args_needed_2 + .globl yet_args_needed_3 + .globl yet_args_needed_4 + + .globl _c3,_c4,_c5,_c6,_c7,_c8,_c9,_c10,_c11,_c12 + .globl _c13,_c14,_c15,_c16,_c17,_c18,_c19,_c20,_c21,_c22 + .globl _c23,_c24,_c25,_c26,_c27,_c28,_c29,_c30,_c31,_c32 + + .globl e__system__nind + .globl e__system__eaind +/* old names of the previous two labels for compatibility, remove later */ + .globl __indirection,__eaind + .globl e__system__dind + .globl eval_fill + + .globl eval_upd_0,eval_upd_1,eval_upd_2,eval_upd_3,eval_upd_4 + .globl eval_upd_5,eval_upd_6,eval_upd_7,eval_upd_8,eval_upd_9 + .globl eval_upd_10,eval_upd_11,eval_upd_12,eval_upd_13,eval_upd_14 + .globl eval_upd_15,eval_upd_16,eval_upd_17,eval_upd_18,eval_upd_19 + .globl eval_upd_20,eval_upd_21,eval_upd_22,eval_upd_23,eval_upd_24 + .globl eval_upd_25,eval_upd_26,eval_upd_27,eval_upd_28,eval_upd_29 + .globl eval_upd_30,eval_upd_31,eval_upd_32 + + .globl repl_args_b + .globl push_arg_b + .globl del_args + + .globl add_IO_time + .globl add_execute_time + .globl _IO_error + .globl stack_overflow + + .globl out_of_memory_4 + .globl print_error + + .if LINUX + .globl __start + .else + .globl _start + .endif + + .if PROFILE +/* .globl init_profiler */ +/* .globl profile_n */ +/* .globl profile_s */ +/* .globl profile_r */ +/* .globl write_profile_information */ +/* .globl write_profile_stack */ + .endif + + .if USE_LIBM + .globl cos_real + .globl sin_real + .globl tan_real + .globl asin_real + .globl acos_real + .globl atan_real + .globl ln_real + .globl log10_real + .globl exp_real + .globl pow_real + .endif + .globl entier_real + .globl r_to_i_real + .if USE_LIBM + .globl _c_pow + .globl _c_log10 + .globl _c_entier + .endif + + .globl __driver + +/* from system.abc: */ + .globl dINT + .globl INT32 + .globl CHAR + .globl BOOL + .globl REAL + .globl REAL32 + .globl FILE + .globl __STRING__ + .globl __ARRAY__ + .globl __cycle__in__spine + .globl __print__graph + .globl __eval__to__nf + +/* from wcon.c: */ + .globl _w_print_char + .globl _w_print_string + .globl _w_print_text + .globl _w_print_int + .globl _w_print_real + + .globl _ew_print_char + .globl _ew_print_text + .globl _ew_print_string + .globl _ew_print_int + + .globl _ew_print_real + + .globl _ab_stack_size + .globl _heap_size + .globl _flags + +/* from standard c library: */ + + .if ! LINUX + .globl allocate_memory + .globl allocate_memory_with_guard_page_at_end + .globl free_memory + .endif + + .globl _heap_size_multiple + .globl _initial_heap_size + + .globl _min_write_heap_size + + .globl __Nil +/* .globl finalizer_list */ + .comm finalizer_list,8 +/* .globl free_finalizer_list */ + .comm free_finalizer_list,8 + +_abc_main: + push rbx + push rcx + push rdx + push rbp + push rsi + push rdi + + call init_clean + test rax,rax + jne init_error + + call init_timer + + mov halt_sp[rip],rsp + + .if PROFILE + call init_profiler + .endif + + .if LINUX + att_call __start + +exit_: + .else + call _start + +exit: + .endif + + call exit_clean + +init_error: + pop rdi + pop rsi + pop rbp + pop rdx + pop rcx + pop rbx + + .if LINUX + mov eax,dword ptr _return_code[rip] + jne return_code_set_1 + mov eax,-1 +return_code_set_1: + .endif + ret + + + .globl DllMain +DllMain: + cmp edx,1 + je DLL_PROCESS_ATTACH + jb DLL_PROCESS_DETACH + ret + +DLL_PROCESS_ATTACH: + push rbx + push rbp + push rsi + push rdi + .if ! LINUX + .byte 0x49 + push rsp + .byte 0x49 + push rbp + .byte 0x49 + push rsi + .byte 0x49 + push rdi + .else + push r12 + push r13 + push r14 + push r15 + .endif + mov qword ptr dll_initisialised[rip],1 + + att_call init_clean + test rax,rax + jne init_dll_error + + att_call init_timer + + mov halt_sp[rip],rsp + + .if PROFILE + att_call init_profiler + .endif + + mov qword ptr saved_heap_p[rip],rdi + mov qword ptr saved_heap_p+8[rip],r15 + mov saved_a_stack_p[rip],rsi + + mov rax,1 + jmp exit_dll_init + +init_dll_error: + xor rax,rax + att_jmp exit_dll_init + +DLL_PROCESS_DETACH: + push rbx + push rbp + push rsi + push rdi + .if ! LINUX + .byte 0x49 + push rsp + .byte 0x49 + push rbp + .byte 0x49 + push rsi + .byte 0x49 + push rdi + .else + push r12 + push r13 + push r14 + push r15 + .endif + + mov rdi,qword ptr saved_heap_p[rip] + mov r15,qword ptr saved_heap_p+8[rip] + mov rsi,saved_a_stack_p[rip] + + att_call exit_clean + +exit_dll_init: + .if ! LINUX + .byte 0x49 + pop rdi + .byte 0x49 + pop rsi + .byte 0x49 + pop rbp + .byte 0x49 + pop rsp + .else + pop r15 + pop r14 + pop r13 + pop r12 + .endif + pop rdi + pop rsi + pop rbp + pop rbx + ret + +init_clean: + lea rax,128[rsp] + sub rsp,32+8 + + sub rax,qword ptr _ab_stack_size[rip] + mov end_b_stack[rip],rax + + mov rax,qword ptr _flags[rip] + and rax,1 + mov basic_only[rip],rax + +/* call allow_prefetch_for_athlon */ + + mov rax,qword ptr _heap_size[rip] + sub rax,7 + xor rdx,rdx + mov rbx,65 + div rbx + mov qword ptr heap_size_65[rip],rax + + mov rax,qword ptr _heap_size[rip] + sub rax,7 + xor rdx,rdx + mov rbx,257 + div rbx + mov heap_size_257[rip],rax + add rax,7 + and rax,-8 + mov qword ptr heap_copied_vector_size[rip],rax + mov qword ptr heap_end_after_copy_gc[rip],0 + + mov rax,qword ptr _heap_size[rip] + add rax,7 + and rax,-8 + mov qword ptr _heap_size[rip],rax + add rax,7 + + mov rbp,rsp + and rsp,-16 + .if LINUX + mov rdi,rax + call _malloc + .else + mov rcx,rax + call allocate_memory + .endif + mov rsp,rbp + + test rax,rax + je no_memory_2 + + mov heap_mbp[rip],rax + lea rdi,7[rax] + and rdi,-8 + mov heap_p[rip],rdi + + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r14,rdi + mov rdi,qword ptr _ab_stack_size[rip] + add rdi,7 + att_call _malloc + mov rdi,r14 + .else + mov rcx,qword ptr _ab_stack_size + add rcx,7 + call allocate_memory_with_guard_page_at_end + .endif + mov rsp,rbp + + test rax,rax + je no_memory_3 + + mov stack_mbp[rip],rax + + add rax,qword ptr _ab_stack_size[rip] + add rax,7+4095 + and rax,-4096 + mov qword ptr a_stack_guard_page[rip],rax + sub rax,qword ptr _ab_stack_size[rip] + + add rax,7 + and rax,-8 + + mov rsi,rax + mov stack_p[rip],rax + + add rax,qword ptr _ab_stack_size[rip] + sub rax,64 + mov qword ptr end_a_stack[rip],rax + + lea rcx,small_integers[rip] + xor rax,rax + lea rbx,(dINT+2)[rip] + +make_small_integers_lp: + mov [rcx],rbx + mov 8[rcx],rax + inc rax + add rcx,16 + cmp rax,33 + att_jne make_small_integers_lp + + lea rcx,static_characters[rip] + xor rax,rax + lea rbx,(CHAR+2)[rip] + +make_static_characters_lp: + mov [rcx],rbx + mov 8[rcx],rax + inc rax + add rcx,16 + cmp rax,256 + att_jne make_static_characters_lp + + lea rcx,(caf_list+8[rip]) + mov qword ptr caf_listp[rip],rcx + + lea rcx,__Nil-8[rip] + mov qword ptr finalizer_list[rip],rcx + mov qword ptr free_finalizer_list[rip],rcx + + mov heap_p1[rip],rdi + + mov rbp,qword ptr heap_size_257[rip] + shl rbp,4 + lea rax,[rdi+rbp*8] + mov heap_copied_vector[rip],rax + add rax,heap_copied_vector_size[rip] + mov heap_p2[rip],rax + + mov byte ptr garbage_collect_flag[rip],0 + + test byte ptr _flags[rip],64 + je no_mark1 + + mov rax,qword ptr heap_size_65[rip] + mov qword ptr heap_vector[rip],rdi + add rdi,rax + + add rdi,7 + and rdi,-8 + + mov qword ptr heap_p3[rip],rdi + lea rbp,[rax*8] + mov byte ptr garbage_collect_flag[rip],-1 + +no_mark1: + mov rax,qword ptr _initial_heap_size[rip] + + mov rbx,4000 + test byte ptr _flags[rip],64 + jne no_mark9 + add rbx,rbx +no_mark9: + + cmp rax,rbx + jle too_large_or_too_small + shr rax,3 + cmp rax,rbp + att_jge too_large_or_too_small + mov rbp,rax +too_large_or_too_small: + + lea rax,[rdi+rbp*8] + mov heap_end_after_gc[rip],rax + + test byte ptr _flags[rip],64 + att_je no_mark2 + mov qword ptr bit_vector_size[rip],rbp +no_mark2: + + mov r15,rbp + + add rsp,32+8 + xor rax,rax + ret + +no_memory_2: + mov rbp,rsp + and rsp,-16 + .if LINUX + lea rdi,out_of_memory_string_1[rip] + .else + lea rcx,out_of_memory_string_1 + .endif + att_call _ew_print_string + mov rsp,rbp + + mov qword ptr _execution_aborted[rip],1 + + add rsp,32 + mov rax,1 + ret + +no_memory_3: + mov rbp,rsp + and rsp,-16 + + .if LINUX + lea rdi,out_of_memory_string_1[rip] + .else + lea ecx,out_of_memory_string_1 + .endif + att_call _ew_print_string + + mov qword ptr _execution_aborted[rip],1 + + .if LINUX + mov rdi,heap_mbp[rip] + att_call _free + .else + mov rcx,heap_mbp + call free_memory + .endif + + mov rsp,rbp + + add rsp,32 + mov rax,1 + ret + +exit_clean: + att_call add_execute_time + + mov rax,qword ptr _flags[rip] + test al,8 + je no_print_execution_time + + mov rbp,rsp + and rsp,-16 + .if ! LINUX + sub rsp,32 + .endif + + .if LINUX + lea rdi,time_string_1[rip] + .else + lea rcx,time_string_1 + .endif + att_call _ew_print_string + + mov rax,execute_time[rip] + call print_time + + .if LINUX + lea rdi,time_string_2[rip] + .else + lea rcx,time_string_2 + .endif + att_call _ew_print_string + + mov rax,garbage_collect_time[rip] + .if MEASURE_GC + .else + add rax,mark_compact_garbage_collect_time[rip] + add rax,compact_garbage_collect_time[rip] + .endif + att_call print_time + + .if MEASURE_GC + + .if LINUX + lea rdi,time_string_3 + .else + lea rcx,time_string_3 + .endif + call _ew_print_string + + mov rax,mark_compact_garbage_collect_time + call print_time + + .if LINUX + lea rdi,time_string_3 + .else + lea rcx,time_string_3 + .endif + call _ew_print_string + + mov rax,compact_garbage_collect_time + call print_time + + .endif + + .if LINUX + lea rdi,time_string_4[rip] + .else + lea rcx,time_string_4 + .endif + att_call _ew_print_string + + mov rax,execute_time[rip] + add rax,garbage_collect_time[rip] + add rax,IO_time[rip] + + add rax,mark_compact_garbage_collect_time[rip] + add rax,compact_garbage_collect_time[rip] + + att_call print_time + + .if LINUX + mov rdi,10 + .else + mov rcx,10 + .endif + att_call _ew_print_char + + .if MEASURE_GC + + .if LINUX + mov rdi,total_gc_bytes + .else + mov rcx,total_gc_bytes + .endif + call _ew_print_int + + .if LINUX + mov rdi,32 + .else + mov rcx,32 + .endif + call _ew_print_char + + .if LINUX + mov rdi,total_compact_gc_bytes + .else + mov rcx,total_compact_gc_bytes + .endif + call _ew_print_int + + .if LINUX + mov rdi,32 + .else + mov rcx,32 + .endif + call _ew_print_char + + mov rax,1000 + cvtsi2sd xmm1,rax + cvtsi2sd xmm0,qword ptr garbage_collect_time + divsd xmm0,xmm1 + call _ew_print_real + + .if LINUX + mov rdi,32 + .else + mov rcx,32 + .endif + call _ew_print_char + + mov rax,1000 + cvtsi2sd xmm1,rax + cvtsi2sd xmm0,qword ptr mark_compact_garbage_collect_time + divsd xmm0,xmm1 + call _ew_print_real + + .if LINUX + mov rdi,32 + .else + mov rcx,32 + .endif + call _ew_print_char + + mov rax,1000 + cvtsi2sd xmm1,rax + cvtsi2sd xmm0,qword ptr compact_garbage_collect_time + divsd xmm0,xmm1 + call _ew_print_real + + .if LINUX + mov rdi,10 + .else + mov rcx,10 + .endif + call _ew_print_char + + mov rax,1000 + cvtsi2sd xmm1,rax + cvtsi2sd xmm2,qword ptr garbage_collect_time + divsd xmm2,xmm1 + mov rax,qword ptr total_gc_bytes + cvtsi2sd xmm0,rax + divsd xmm0,xmm2 + call _ew_print_real + + .if LINUX + mov rdi,32 + .else + mov rcx,32 + .endif + call _ew_print_char + + mov rax,1000 + cvtsi2sd xmm1,rax + cvtsi2sd xmm2,qword ptr mark_compact_garbage_collect_time + divsd xmm2,xmm1 + mov rax,qword ptr total_compact_gc_bytes + cvtsi2sd xmm0,rax + divsd xmm0,xmm2 + call _ew_print_real + + .if LINUX + mov rdi,32 + .else + mov rcx,32 + .endif + call _ew_print_char + + mov rax,1000 + cvtsi2sd xmm1,rax + cvtsi2sd xmm2,qword ptr compact_garbage_collect_time + divsd xmm2,xmm1 + mov rax,qword ptr total_compact_gc_bytes + cvtsi2sd xmm0,rax + divsd xmm0,xmm2 + call _ew_print_real + + .if LINUX + mov rdi,32 + .else + mov rcx,32 + .endif + call _ew_print_char + + mov rax,1000 + cvtsi2sd xmm1,rax + cvtsi2sd xmm2,qword ptr mark_compact_garbage_collect_time + cvtsi2sd xmm3,qword ptr compact_garbage_collect_time + addsd xmm2,xmm3 + divsd xmm2,xmm1 + mov rax,qword ptr total_compact_gc_bytes + cvtsi2sd xmm0,rax + divsd xmm0,xmm2 + call _ew_print_real + + .if LINUX + mov rdi,10 + .else + mov rcx,10 + .endif + call _ew_print_char + + .endif + + mov rsp,rbp + +no_print_execution_time: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov rdi,stack_mbp[rip] + att_call _free + + mov rdi,heap_mbp[rip] + att_call _free + .else + mov rcx,stack_mbp + sub rsp,32 + call free_memory + + mov rcx,heap_mbp + call free_memory + add rsp,32 + .endif + mov rsp,rbp + + .if PROFILE + .if ! TRACE + call write_profile_information + .endif + .endif + + ret + +__driver: + mov rbp,qword ptr _flags[rip] + test rbp,16 + att_je __print__graph + att_jmp __eval__to__nf + +print_time: + push rbp + + xor rdx,rdx + mov rbx,1000 + div rbx + mov rcx,rax + mov rax,rdx + xor rdx,rdx + mov rbx,10 + div rbx + + push rax + + mov rbp,rsp + and rsp,-16 + .if LINUX + mov rdi,rcx + .else + sub rsp,32 + .endif + att_call _ew_print_int + mov rsp,rbp + + lea rcx,sprintf_time_buffer[rip] + + xor rdx,rdx + mov rbx,10 + +/* movb $'.',(%rcx ) */ + mov byte ptr [rcx],46 + + pop rax + + div rbx + add rax,48 + add rdx,48 + mov byte ptr 1[rcx],al + mov byte ptr 2[rcx],dl + + mov rbp,rsp + and rsp,-16 + .if LINUX + mov rsi,3 + mov rdi,rcx + .else + mov rdx,3 + sub rsp,32 + .endif + att_call _ew_print_text + mov rsp,rbp + + pop rbp + ret + +print_sc: + mov rbp,basic_only[rip] + test rbp,rbp + jne end_print + +print: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + mov rdi,rax + .else + mov rcx,rax + sub rsp,32 + .endif + att_call _w_print_string + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + +end_print: + ret + +dump: + att_call print + att_jmp halt + +printD: test al,2 + jne printD_ + + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + + lea rdi,4[rax] + mov esi,0[rax] + .else + lea rcx,4[rax] + mov edx,dword ptr [rax] + sub rsp,32 + .endif + att_call _w_print_text + .if LINUX + mov rsi,r13 + mov rsi,r14 + .endif + mov rsp,rbp + ret + +DtoAC_record: + .if NEW_DESCRIPTORS + movsxd rbp,dword ptr (-6)[rax] + .else + movsx rbp,dword ptr (-4)[rbp] + .endif + jmp DtoAC_string_a2 + +DtoAC: test al,2 + jne DtoAC_ + + mov rbp,rax + att_jmp DtoAC_string_a2 + +DtoAC_: + .if NEW_DESCRIPTORS + cmp word ptr (-2)[rax],256 + att_jae DtoAC_record + + movzx rbx,word ptr [rax] + lea rbp,10[rax+rbx] + .else + lea rbp,(-2)[rax] + movsx rbx,word ptr [rbp] + cmp rbx,256 + jae DtoAC_record + + shl rbx,3 + sub rbp,rbx + + movzx rbx,word ptr (-2)[rbp] + lea rbp,4[rbp+rbx*8] + .endif + +DtoAC_string_a2: + mov eax,dword ptr [rbp] + lea rcx,4[rbp] + jmp build_string + +print_symbol: + xor rbx,rbx + jmp print_symbol_2 + +print_symbol_sc: + mov rbx,basic_only[rip] +print_symbol_2: + mov rax,[rcx] + + lea rbp,dINT+2[rip] + cmp rax,rbp + je print_int_node + + lea rbp,CHAR+2[rip] + cmp rax,rbp + je print_char_denotation + + lea rbp,BOOL+2[rip] + cmp rax,rbp + je print_bool + + lea rbp,REAL+2[rip] + cmp rax,rbp + je print_real_node + + test rbx,rbx + jne end_print_symbol + +printD_: + cmp word ptr (-2)[rax],256 + jae print_record + + movzx rbx,word ptr [rax] + lea rbp,10[rax+rbx] + jmp print_string_a2 + +print_record: + movsxd rbp,dword ptr (-6)[rax] + lea rbp,-6[rax+rbp] + att_jmp print_string_a2 + +end_print_symbol: + ret + +print_int_node: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + mov rdi,8[rcx] + .else + sub rsp,32 + mov rcx,8[rcx] + .endif + att_call _w_print_int + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + ret + +print_int: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + mov rdi,rax + .else + mov rcx,rax + sub rsp,32 + .endif + att_call _w_print_int + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + ret + +print_char_denotation: + test rbx,rbx + jne print_char_node + + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + .else + sub rsp,32 + .endif + mov rbx,8[rcx] + + .if LINUX + mov rdi,0x27 + .else + mov rcx,0x27 + .endif + att_call _w_print_char + + .if LINUX + mov rdi,rbx + .else + mov rcx,rbx + .endif + att_call _w_print_char + + .if LINUX + mov rdi,0x27 + .else + mov rcx,0x27 + .endif + att_call _w_print_char + + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + ret + +print_char_node: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + + mov rdi,8[rcx] +.else + mov rcx,8[rcx] + sub rsp,32 + .endif + att_call _w_print_char + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + ret + +print_char: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + + mov rdi,rax + .else + mov rcx,rax + sub rsp,32 + .endif + att_call _w_print_char + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + ret + +print_bool: + movsx rcx,byte ptr 8[rcx] + test rcx,rcx + je print_false + +print_true: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + lea rdi,true_c_string[rip] + .else + lea rcx,true_c_string + sub rsp,32 + .endif + att_call _w_print_string + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + ret + +print_false: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + lea rdi,false_c_string[rip] + .else + lea rcx,false_c_string + sub rsp,32 + .endif + att_call _w_print_string + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + ret + +print_real_node: + movlpd xmm0,qword ptr 8[rcx] +print_real: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + .else + sub rsp,32 + .endif + att_call _w_print_real + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + ret + +print_string_a2: + .if LINUX + mov r13,rsi + mov r14,rdi + lea rdi,4[rbp] + mov esi,0[rbp] + mov rbp,rsp + and rsp,-16 + .else + lea rcx,4[rbp] + mov edx,0[rbp] + mov rbp,rsp + and rsp,-16 + sub rsp,32 + .endif + att_call _w_print_text + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + ret + +print__chars__sc: + mov rbp,basic_only[rip] + test rbp,rbp + jne no_print_chars + +print__string__: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + mov rsi,8[rcx] + lea rdi,16[rcx] + .else + mov rdx,8[rcx] + lea rcx,16[rcx] + sub rsp,32 + .endif + att_call _w_print_text + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp +no_print_chars: + ret + +push_a_r_args: + push rdi + + mov rdx,qword ptr 16[rcx] + sub rdx,2 + movzx rdi,word ptr [rdx] + sub rdi,256 + movzx rbx,word ptr 2[rdx] + add rdx,4 + push rdx + + mov rdx,rdi + sub rdx,rbx + + shl rax,3 + lea rcx,24[rcx+rbx*8] + dec rdi +mul_array_size_lp: + add rcx,rax + sub rdi,1 + att_jnc mul_array_size_lp + + lea rdi,[rcx+rdx*8] + jmp push_a_elements +push_a_elements_lp: + mov rax,qword ptr (-8)[rcx] + sub rcx,8 + mov qword ptr [rsi],rax + add rsi,8 +push_a_elements: + sub rbx,1 + att_jnc push_a_elements_lp + + mov rcx,rdi + pop rax + pop rdi + + pop rbp + jmp push_b_elements +push_b_elements_lp: + push (-8)[rcx] + sub rcx,8 +push_b_elements: + sub rdx,1 + att_jnc push_b_elements_lp + + jmp rbp + +push_t_r_args: + pop rbp + + mov rdx,qword ptr [rcx] + add rcx,8 + sub rdx,2 + movzx rax,word ptr [rdx] + sub rax,256 + movzx rbx,word ptr 2[rdx] + add rdx,4 + + mov qword ptr [rsi],rdx + mov qword ptr 8[rsi],rbx + + sub rbx,rax + neg rbx + + lea rdx,[rcx+rax*8] + cmp rax,2 + jbe small_record + mov rdx,qword ptr 8[rcx] + lea rdx,(-8)[rdx+rax*8] +small_record: + jmp push_r_b_elements + +push_r_b_elements_lp: + dec rax + jne not_first_arg_b + + push [rcx] + att_jmp push_r_b_elements +not_first_arg_b: + push (-8)[rdx] + sub rdx,8 +push_r_b_elements: + sub rbx,1 + att_jnc push_r_b_elements_lp + + mov rbx,qword ptr 8[rsi] + push rbp + push [rsi] + jmp push_r_a_elements + +push_r_a_elements_lp: + dec rax + jne not_first_arg_a + + mov rbp,qword ptr [rcx] + mov qword ptr [rsi],rbp + add rsi,8 + att_jmp push_r_a_elements +not_first_arg_a: + mov rbp,qword ptr (-8)[rdx] + sub rdx,8 + mov qword ptr [rsi],rbp + add rsi,8 +push_r_a_elements: + sub rbx,1 + att_jnc push_r_a_elements_lp + + pop rax + ret + +BtoAC: + test al,al + je BtoAC_false +BtoAC_true: + lea rcx,true_string[rip] + ret +BtoAC_false: + lea rcx,false_string[rip] + ret + +RtoAC: + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + lea rsi,printf_real_string[rip] + lea rdi,sprintf_buffer[rip] + mov rax,1 + call _sprintf + mov rsi,r13 + mov rdi,r14 + .else + lea rdx,sprintf_buffer + sub rsp,32 + call convert_real_to_string + .endif + mov rsp,rbp + jmp return_sprintf_buffer + +ItoAC: + lea rcx,sprintf_buffer[rip] + call int_to_string + + mov rax,rcx + lea rcx,sprintf_buffer[rip] + sub rax,rcx + + jmp sprintf_buffer_to_string + + .globl convert_int_to_string +convert_int_to_string: + push rbp + push rbx + mov rax,rdx + att_call int_to_string + mov rax,rcx + pop rbx + pop rbp + ret + +int_to_string: + test rax,rax + jns no_minus + mov byte ptr [rcx],45 + inc rcx + neg rax +no_minus: + mov rbp,rcx + + je zero_digit + +calculate_digits: + cmp rax,10 + jb last_digit + + mov rdx,0x0cccccccccccccccd + mov rbx,rax + + mul rdx + + mov rax,rdx + and rdx,-8 + add rbx,48 + + shr rax,3 + sub rbx,rdx + shr rdx,2 + + sub rbx,rdx + mov byte ptr [rcx],bl + + inc rcx + att_jmp calculate_digits + +last_digit: + test rax,rax + je no_zero +zero_digit: + add rax,48 + mov byte ptr [rcx],al + inc rcx +no_zero: + mov rdx,rcx + +reverse_digits: + dec rdx + cmp rbp,rdx + jae end_reverse_digits + mov bl,byte ptr [rbp] + mov al,byte ptr [rdx] + mov byte ptr [rdx],bl + mov byte ptr [rbp],al + inc rbp + att_jmp reverse_digits + +end_reverse_digits: + mov byte ptr [rcx],0 + ret + +return_sprintf_buffer: + lea rax,(sprintf_buffer-1)[rip] +skip_characters: + inc rax + cmp byte ptr [rax],0 + att_jne skip_characters + + lea rcx,sprintf_buffer[rip] + sub rax,rcx + +sprintf_buffer_to_string: + lea rcx,sprintf_buffer[rip] +build_string: + + lea rbx,16+7[rax] + shr rbx,3 + + sub r15,rbx + jge D_to_S_no_gc + + push rcx + att_call collect_0 + pop rcx + +D_to_S_no_gc: + sub rbx,2 + mov rbp,rdi + lea r9,__STRING__+2[rip] + mov qword ptr [rdi],r9 + mov 8[rdi],rax + add rdi,16 + jmp D_to_S_cp_str_2 + +D_to_S_cp_str_1: + mov rax,[rcx] + add rcx,8 + mov [rdi],rax + add rdi,8 +D_to_S_cp_str_2: + sub rbx,1 + att_jnc D_to_S_cp_str_1 + + mov rcx,rbp + ret + +eqD: mov rax,[rcx] + cmp rax,[rdx] + jne eqD_false + + lea rbp,dINT+2[rip] + cmp rax,rbp + je eqD_INT + lea rbp,CHAR+2[rip] + cmp rax,rbp + je eqD_CHAR + lea rbp,BOOL+2[rip] + cmp rax,rbp + je eqD_BOOL + lea rbp,REAL+2[rip] + cmp rax,rbp + je eqD_REAL + + mov rax ,1 + ret + +eqD_CHAR: +eqD_INT: + mov rbx,8[rcx] + xor rax,rax + cmp rbx,8[rdx] + sete al + ret + +eqD_BOOL: + mov bl,byte ptr 8[rcx] + xor rax,rax + cmp bl,byte ptr 8[rdx] + sete al + ret + +eqD_REAL: + movlpd xmm0,qword ptr 8[rcx] + comisd xmm0,qword ptr 8[rdx] + fnstsw ax + and ah,68 + xor ah,64 + sete al + and rax,1 + ret + +eqD_false: + xor rax ,rax + ret +/* */ +/* the timer */ +/* */ + + +init_timer: + mov rbp,rsp + and rsp,-16 + sub rsp,32 + .if LINUX + mov r13,rsi + mov r14,rdi + mov rdi,rsp + att_call _times + mov rsi,r13 + mov rdi,r14 + mov eax,[rsp] + imul eax,10 + .else + call GetTickCount + .endif + mov rsp,rbp + + mov last_time[rip],rax + xor rax ,rax + mov execute_time[rip],rax + mov garbage_collect_time[rip],rax + mov IO_time[rip],rax + + mov mark_compact_garbage_collect_time[rip],rax + mov compact_garbage_collect_time[rip],rax + + ret + +get_time_diff: + mov rbp,rsp + and rsp,-16 + sub rsp,32 + .if LINUX + mov r13,rsi + mov r14,rdi + mov rdi,rsp + att_call _times + mov rsi,r13 + mov rdi,r14 + mov eax,[rsp] + imul eax,10 + .else + call GetTickCount + .endif + mov rsp,rbp + + lea rcx,last_time[rip] + mov rdx,[rcx] + mov [rcx],rax + sub rax,rdx + ret + +add_execute_time: + att_call get_time_diff + lea rcx,execute_time[rip] + +add_time: + add rax,[rcx] + mov [rcx],rax + ret + +add_garbage_collect_time: + att_call get_time_diff + lea rcx,garbage_collect_time[rip] + att_jmp add_time + +add_IO_time: + att_call get_time_diff + lea rcx,IO_time[rip] + att_jmp add_time + +add_mark_compact_garbage_collect_time: + att_call get_time_diff + lea rcx,mark_compact_garbage_collect_time[rip] + att_jmp add_time + +add_compact_garbage_collect_time: + att_call get_time_diff + lea rcx,compact_garbage_collect_time[rip] + att_jmp add_time +/* */ +/* the garbage collector */ +/* */ + +collect_3: + .if PROFILE + lea rbp,garbage_collector_name[rip] + att_call profile_s + .endif + mov [rsi],rcx + mov 8[rsi],rdx + mov 16[rsi],r8 + add rsi,24 + call collect_0_ + mov r8,(-8)[rsi] + mov rdx,(-16)[rsi] + mov rcx,(-24)[rsi] + sub rsi,24 + .if PROFILE + jmp profile_r + .else + ret + .endif + +collect_2: + .if PROFILE + lea rbp,garbage_collector_name[rip] + att_call profile_s + .endif + mov [rsi],rcx + mov 8[rsi],rdx + add rsi,16 + att_call collect_0_ + mov rdx,(-8)[rsi] + mov rcx,(-16)[rsi] + sub rsi,16 + .if PROFILE + att_jmp profile_r + .else + ret + .endif + +collect_1: + .if PROFILE + lea rbp,garbage_collector_name[rip] + att_call profile_s + .endif + mov [rsi],rcx + add rsi,8 + att_call collect_0_ + mov rcx,(-8)[rsi] + sub rsi,8 + .if PROFILE + att_jmp profile_r + .else + ret + .endif + +collect_0: + .if PROFILE + lea rbp,garbage_collector_name[rip] + att_call profile_s + .endif + att_call collect_0_ + .if PROFILE + att_jmp profile_r + .else + ret + .endif + +collect_0_: + mov rbp,rdi + + push rax + push rbx + + mov rbx,qword ptr heap_end_after_gc[rip] + sub rbx,rdi + + shr rbx,3 + sub rbx,r15 + mov qword ptr n_allocated_words[rip],rbx + + test byte ptr _flags[rip],64 + je no_mark3 + + mov rbp,qword ptr bit_counter[rip] + test rbp,rbp + je no_scan + + push rsi + mov rsi,rbx + + xor rbx,rbx + mov rcx,qword ptr bit_vector_p[rip] + +scan_bits: + cmp ebx,dword ptr[rcx] + je zero_bits + mov dword ptr [rcx],ebx + add rcx,4 + sub rbp,1 + att_jne scan_bits + + jmp end_scan + +zero_bits: + lea rdx,4[rcx] + add rcx,4 + sub rbp,1 + jne skip_zero_bits_lp1 + jmp end_bits + +skip_zero_bits_lp: + test rax,rax + jne end_zero_bits +skip_zero_bits_lp1: + mov eax,dword ptr [rcx] + add rcx,4 + sub rbp,1 + att_jne skip_zero_bits_lp + + test rax,rax + att_je end_bits + mov dword ptr (-4)[rcx],ebx + mov rax,rcx + sub rax,rdx + jmp end_bits2 + +end_zero_bits: + mov rax,rcx + sub rax,rdx + shl rax,3 + add qword ptr n_free_words_after_mark[rip],rax + mov dword ptr (-4)[rcx],ebx + + cmp rax,rsi + att_jb scan_bits + +found_free_memory: + mov qword ptr bit_counter[rip],rbp + mov qword ptr bit_vector_p[rip],rcx + + lea rbp,(-4)[rdx] + sub rbp,qword ptr heap_vector[rip] + shl rbp,6 + mov rdi,qword ptr heap_p3[rip] + add rdi,rbp + + lea rbp,[rdi+rax*8] + mov qword ptr heap_end_after_gc[rip],rbp + + mov r15,rax + sub r15,rsi + + pop rsi + pop rbx + pop rax + ret + +end_bits: + mov rax,rcx + sub rax,rdx + add rax,4 +end_bits2: + shl rax,3 + add qword ptr n_free_words_after_mark[rip],rax + cmp rax,rsi + att_jae found_free_memory + +end_scan: + pop rsi + mov qword ptr bit_counter[rip],rbp + +no_scan: + +no_mark3: + movsx rax,byte ptr garbage_collect_flag[rip] + test rax,rax + jle collect + + sub rax,2 + mov byte ptr garbage_collect_flag[rip],al + + mov rbp,qword ptr extra_heap_size[rip] + cmp rbx,rbp + att_ja collect + + mov rdi,qword ptr extra_heap[rip] + + mov r15,rbp + + lea rbp,[rdi+rbp*8] + mov qword ptr heap_end_after_gc[rip],rbp + + sub r15,rbx + + pop rbx + pop rax + ret + +collect: + .if LINUX + sub rsp,104 + .else + sub rsp,88 + .endif + mov 32[rsp],r10 + mov 24[rsp],r11 + mov 16[rsp],r12 + mov 8[rsp],r13 + mov [rsp],r14 + movsd 40[rsp],xmm0 + movsd 48[rsp],xmm1 + movsd 56[rsp],xmm2 + movsd 64[rsp],xmm3 + movsd 72[rsp],xmm4 + movsd 80[rsp],xmm5 + .if LINUX + movsd 88[rsp],xmm6 + movsd 96[rsp],xmm7 + .endif + + att_call add_execute_time + + test qword ptr _flags[rip],4 + je no_print_stack_sizes + + mov rbp,rsp + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + .else + sub rsp,32 + .endif + + .if 0 + .if LINUX + mov rdi,qword ptr 64[rsp] + .else + mov rcx,qword ptr 96[rsp] + .endif + call _ew_print_int + + .if LINUX + mov rdi,32 + .else + mov rcx,32 + .endif + att_call _ew_print_char + .endif + + .if LINUX + lea rdi,garbage_collect_string_1[rip] + .else + lea rcx,garbage_collect_string_1 + .endif + att_call _ew_print_string + + .if LINUX + mov rdi,r13 + sub rdi,stack_p[rip] + .else + mov rcx,rsi + sub rcx,stack_p + .endif + att_call _ew_print_int + + .if LINUX + lea rdi,garbage_collect_string_2[rip] + .else + lea rcx,garbage_collect_string_2 + .endif + att_call _ew_print_string + + .if LINUX + mov rdi,halt_sp[rip] + sub rdi,rsp + .else + mov rcx,halt_sp + sub rcx,rsp + .endif + att_call _ew_print_int + + .if LINUX + lea rdi,garbage_collect_string_3[rip] + .else + lea rcx,garbage_collect_string_3 + .endif + att_call _ew_print_string + + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + mov rsp,rbp + +no_print_stack_sizes: + mov rax,stack_p[rip] + add rax,qword ptr _ab_stack_size[rip] + cmp rsi,rax + att_ja stack_overflow + + test byte ptr _flags[rip],64 + jne compacting_collector + + cmp byte ptr garbage_collect_flag[rip],0 + att_jne compacting_collector + + mov rbp,heap_copied_vector[rip] + + cmp qword ptr heap_end_after_copy_gc[rip],0 + je zero_all + + mov rax,rdi + sub rax,qword ptr heap_p1[rip] + add rax,127*8 + shr rax,9 + call zero_bit_vector + + mov rdx,qword ptr heap_end_after_copy_gc[rip] + sub rdx,qword ptr heap_p1[rip] + shr rdx,7 + and rdx,-4 + + mov rbp,qword ptr heap_copied_vector[rip] + mov rax,qword ptr heap_copied_vector_size[rip] + add rbp,rdx + sub rax,rdx + shr rax,2 + + mov qword ptr heap_end_after_copy_gc[rip],0 + + att_call zero_bit_vector + jmp end_zero_bit_vector + +zero_all: + mov rax,heap_copied_vector_size[rip] + shr rax,2 + att_call zero_bit_vector + +end_zero_bit_vector: + + .include "acopy.s" + + mov qword ptr heap2_begin_and_end[rip],rsi + + mov r15,rsi + sub r15,rdi + + mov rax,heap_size_257[rip] + shl rax,7 + sub rax,r15 + add qword ptr total_gc_bytes[rip],rax + + shr r15,3 + + pop rsi + + att_call add_garbage_collect_time + + sub r15,qword ptr n_allocated_words[rip] + jc switch_to_mark_scan + + lea rax,[r15+r15*4] + shl rax,6 + mov rbx,qword ptr _heap_size[rip] + mov rcx,rbx + shl rbx,2 + add rbx,rcx + add rbx,rbx + add rbx,rcx + cmp rax,rbx + jnc no_mark_scan + +switch_to_mark_scan: + mov rax,qword ptr heap_size_65[rip] + shl rax,6 + mov rbx,qword ptr heap_p[rip] + + mov rcx,qword ptr heap_p1[rip] + cmp rcx,qword ptr heap_p2[rip] + jc vector_at_begin + +vector_at_end: + mov qword ptr heap_p3[rip],rbx + add rbx,rax + mov qword ptr heap_vector[rip],rbx + + mov rax,qword ptr heap_p1[rip] + mov qword ptr extra_heap[rip],rax + sub rbx,rax + shr rbx,3 + mov qword ptr extra_heap_size[rip],rbx + jmp switch_to_mark_scan_2 + +vector_at_begin: + mov qword ptr heap_vector[rip],rbx + add rbx,qword ptr _heap_size[rip] + sub rbx,rax + mov qword ptr heap_p3[rip],rbx + + mov qword ptr extra_heap[rip],rbx + mov rcx,qword ptr heap_p2[rip] + sub rcx,rbx + shr rcx,3 + mov qword ptr extra_heap_size[rip],rcx + +switch_to_mark_scan_2: + mov rax,heap_size_257[rip] + shl rax,7-3 + sub rax,r15 + shl rax,3 + + mov byte ptr garbage_collect_flag[rip],1 + + lea rcx,heap_use_after_gc_string_1[rip] + + test r15,r15 + jns end_garbage_collect + + mov byte ptr garbage_collect_flag[rip],-1 + + mov rbx,qword ptr extra_heap_size[rip] + mov r15,rbx + sub r15,qword ptr n_allocated_words[rip] + js out_of_memory_4_3 + + mov rdi,qword ptr extra_heap[rip] + shl rbx,3 + add rbx,rdi + mov qword ptr heap_end_after_gc[rip],rbx + + mov qword ptr heap_end_write_heap[rip],rdi + + mov qword ptr d3_flag_write_heap[rip],1 + jmp end_garbage_collect_ + +no_mark_scan: +/* exchange the semi_spaces */ + mov rax,heap_p1[rip] + mov rbx,heap_p2[rip] + mov heap_p2[rip],rax + mov heap_p1[rip],rbx + + mov rax,heap_size_257[rip] + shl rax,7-3 + mov rbx,rax + sub rax,r15 + + mov rcx,rax + imul qword ptr _heap_size_multiple[rip] + shrd rax,rdx,9 + shr rdx,9 + jne no_small_heap1 + + cmp rax,4000 + jge not_too_small1 + mov rax,4000 +not_too_small1: + sub rbx,rax + att_jb no_small_heap1 + + sub r15,rbx + shl rbx,3 + mov rbp,qword ptr heap_end_after_gc[rip] + mov qword ptr heap_end_after_copy_gc[rip],rbp + sub rbp,rbx + mov qword ptr heap_end_after_gc[rip],rbp + +no_small_heap1: + mov rax,rcx + shl rax,3 + + lea rcx,heap_use_after_gc_string_1[rip] + +end_garbage_collect: + + mov qword ptr heap_end_write_heap[rip],rdi + mov qword ptr d3_flag_write_heap[rip],0 + +end_garbage_collect_: + test qword ptr _flags[rip],2 + je no_heap_use_message + + push rax + + mov rbp,rsp + and rsp,-16 + + .if LINUX + mov r13,rsi + mov r14,rdi + + mov rdi,rcx + .else + sub rsp,32 + .endif + att_call _ew_print_string + + .if LINUX + mov rdi,[rbp] + .else + mov rcx,[rbp] + .endif + att_call _ew_print_int + + .if LINUX + lea rdi,heap_use_after_gc_string_2[rip] + .else + lea rcx,heap_use_after_gc_string_2 + .endif + att_call _ew_print_string + + .if LINUX + mov rsi,r13 + mov rdi,r14 + .else + add rsp,32 + .endif + mov rsp,rbp + + pop rax + +no_heap_use_message: + call call_finalizers + + test byte ptr _flags[rip],32 + je no_write_heap + + cmp rax,qword ptr _min_write_heap_size[rip] + att_jb no_write_heap + + push rcx + push rdx + push rbp + push rsi + push rdi + + sub rsp,128 + + mov rax,qword ptr d3_flag_write_heap[rip] + test rax,rax + jne copy_to_compact_with_alloc_in_extra_heap + + movsx rax,byte ptr garbage_collect_flag[rip] + + mov rcx,qword ptr heap2_begin_and_end[rip] + mov rdx,qword ptr (heap2_begin_and_end+8)[rip] + + lea rbx,heap_p1[rip] + + test rax,rax + je gc0 + + lea rbx,heap_p2[rip] + jg gc1 + + lea rbx,heap_p3[rip] + xor rcx,rcx + xor rdx,rdx + +gc0: +gc1: + mov rbx,qword ptr [rbx] + + mov rax,rsp + + mov qword ptr [rax],rbx + mov qword ptr 8[rax],rdi + + mov qword ptr 16[rax],rcx + mov qword ptr 24[rax],rdx + + mov rbx ,qword ptr stack_p[rip] + + mov qword ptr 32[rax],rbx + + mov qword ptr 40[rax],rsi + mov qword ptr 48[rax],0 + mov qword ptr 56[rax],0 + + lea rbp,small_integers[rip] + mov qword ptr 64[rax],rbp + lea rbp,static_characters[rip] + mov qword ptr 72[rax],rbp + + lea rbp,dINT+2[rip] + mov qword ptr 80[rax],rbp + lea rbp,CHAR+2[rip] + mov qword ptr 88[rax],rbp + lea rbp,REAL+2[rip] + mov qword ptr 96[rax],rbp + lea rbp,BOOL+2[rip] + mov qword ptr 104[rax],rbp + lea rbp,__STRING__+2[rip] + mov qword ptr 112[rax],rbp + lea rbp,__ARRAY__+2[rip] + mov qword ptr 120[rax],rbp + + mov rbp,rsp + and rsp,-16 + .if LINUX + mov rdi,rax + .else + mov rcx,rax + sub rsp,32 + .endif + .if ! LINUX + call write_heap + .endif + mov rsp,rbp + + add rsp,128 + + pop rdi + pop rsi + pop rbp + pop rdx + pop rcx +no_write_heap: + +restore_registers_after_gc_and_return: + mov r10,32[rsp] + mov r11,24[rsp] + mov r12,16[rsp] + mov r13,8[rsp] + mov r14,[rsp] + movlpd xmm0,40[rsp] + movlpd xmm1,48[rsp] + movlpd xmm2,56[rsp] + movlpd xmm3,64[rsp] + movlpd xmm4,72[rsp] + movlpd xmm5,80[rsp] + .if LINUX + movlpd xmm6,88[rsp] + movlpd xmm7,96[rsp] + add rsp,104 + .else + add rsp,88 + .endif + pop rbx + pop rax + ret + +call_finalizers: + mov rax,qword ptr free_finalizer_list[rip] + +call_finalizers_lp: + lea r9,__Nil-8[rip] + cmp rax,r9 + je end_call_finalizers + push 8[rax] + mov rbx,qword ptr 16[rax] + push 8[rbx] + call qword ptr [rbx] + add rsp,8 + pop rax + att_jmp call_finalizers_lp +end_call_finalizers: + + lea r9,__Nil-8[rip] + mov qword ptr free_finalizer_list[rip],r9 + ret + +copy_to_compact_with_alloc_in_extra_heap: + mov rcx,qword ptr heap2_begin_and_end[rip] + mov rdx,qword ptr (heap2_begin_and_end+8)[rip] + lea rbx,heap_p2[rip] + att_jmp gc1 + +allow_prefetch_for_athlon: + test qword ptr _flags[rip],4096 + jne no_prefetch_flag + + xor rax,rax + cpuid + test rax,rax + jz disable_prefetch_flag + + .if LINUX + cmp rbx,'A+('u*0x100)+('t*0x10000)+('h*0x1000000) + att_jne disable_prefetch_flag + cmp rdx,'e+('n*0x100)+('t*0x10000)+('i*0x1000000) + att_jne disable_prefetch_flag + cmp rcx,'c+('A*0x100)+('M*0x10000)+('D*0x1000000) + att_jne disable_prefetch_flag + .else + cmp rbx,'A'+('u' shl 8)+('t' shl 16)+('h' shl 24) + jne disable_prefetch_flag + cmp rdx,'e'+('n' shl 8)+('t' shl 16)+('i' shl 24) + jne disable_prefetch_flag + cmp rcx,'c'+('A' shl 8)+('M' shl 16)+('D' shl 24) + jne disable_prefetch_flag + .endif + +/* mov rax,1 */ +/* cpuid */ +/* and rax,0x0f00 */ +/* cmp rax,0x600 */ +/* je keep_prefetch_flag */ + + ret + +disable_prefetch_flag: + and qword ptr _flags[rip],-4097 +keep_prefetch_flag: +no_prefetch_flag: + ret + +out_of_memory_4_3: +out_of_memory_4_2: +out_of_memory_4_1: +out_of_memory_4: + att_call add_garbage_collect_time + + lea rbp,out_of_memory_string_4[rip] + att_jmp print_error + +zero_bit_vector: + xor rdx,rdx + test al,1 + je zero_bits1_1 + mov dword ptr [rbp],edx + add rbp,4 +zero_bits1_1: + shr rax,1 + + mov rbx,rax + shr rax,1 + test bl,1 + je zero_bits1_5 + + sub rbp,8 + jmp zero_bits1_2 + +zero_bits1_4: + mov dword ptr [rbp],edx + mov dword ptr 4[rbp],edx +zero_bits1_2: + mov dword ptr 8[rbp],edx + mov dword ptr 12[rbp],edx + add rbp,16 +zero_bits1_5: + sub rax,1 + att_jae zero_bits1_4 + ret + +reorder: + push rsi + push rbp + + mov rbp,rax + shl rbp,3 + mov rsi,rbx + shl rsi,3 + add rcx,rsi + sub rdx,rbp + + push rsi + push rbp + push rbx + push rax + jmp st_reorder_lp + +reorder_lp: + mov rbp,qword ptr [rcx] + mov rsi,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rbp + sub rdx,8 + mov qword ptr [rcx],rsi + add rcx,8 + + dec rax + jne next_b_in_element + mov rax,qword ptr [rsp] + add rcx,qword ptr 24[rsp] +next_b_in_element: + dec rbx + jne next_a_in_element + mov rbx,qword ptr 8[rsp] + sub rdx,qword ptr 16[rsp] +next_a_in_element: +st_reorder_lp: + cmp rdx,rcx + att_ja reorder_lp + + pop rax + pop rbx + add rsp,16 + pop rbp + pop rsi + ret + +/* */ +/* the sliding compacting garbage collector */ +/* */ + +compacting_collector: +/* zero all mark bits */ + + mov rax,qword ptr heap_p3[rip] + neg rax + mov qword ptr neg_heap_p3[rip],rax + + mov qword ptr stack_top[rip],rsi + + mov rdi,qword ptr heap_vector[rip] + + test byte ptr _flags[rip],64 + je no_mark4 + + cmp qword ptr zero_bits_before_mark[rip],0 + je no_zero_bits + + mov qword ptr zero_bits_before_mark[rip],0 + +no_mark4: + mov rbp,rdi + mov rax,qword ptr heap_size_65[rip] + add rax,3 + shr rax,2 + + xor rbx,rbx + + test al,1 + je zero_bits_1 + mov dword ptr [rbp],ebx + add rbp,4 +zero_bits_1: + mov rcx,rax + shr rax,2 + + test cl,2 + je zero_bits_5 + + sub rbp,8 + jmp zero_bits_2 + +zero_bits_4: + mov dword ptr [rbp],ebx + mov dword ptr 4[rbp],ebx +zero_bits_2: + mov dword ptr 8[rbp],ebx + mov dword ptr 12[rbp],ebx + add rbp,16 +zero_bits_5: + sub rax,1 + att_jnc zero_bits_4 + + test byte ptr _flags[rip],64 + je no_mark5 + +no_zero_bits: + mov rax,qword ptr n_last_heap_free_bytes[rip] + mov rbx,qword ptr n_free_words_after_mark[rip] + shl rbx,3 + + mov rbp,rbx + shl rbp,3 + add rbp,rbx + shr rbp,2 + + cmp rax,rbp + jg compact_gc + + mov rbx,qword ptr bit_vector_size[rip] + shl rbx,3 + + sub rax,rbx + neg rax + + imul qword ptr _heap_size_multiple[rip] + shrd rax,rdx,7 + shr rdx,7 + jne no_smaller_heap + + cmp rax,rbx + att_jae no_smaller_heap + + cmp rbx,8000 + att_jbe no_smaller_heap + + att_jmp compact_gc +no_smaller_heap: + test qword ptr _flags[rip],4096 + jne pmark + + .include "amark.s" + + .include "amark_prefetch.s" + +compact_gc: + mov qword ptr zero_bits_before_mark[rip],1 + mov qword ptr n_last_heap_free_bytes[rip],0 + mov qword ptr n_free_words_after_mark[rip],1000 + +no_mark5: + + .include "acompact.s" + + mov rsi,qword ptr stack_top[rip] + + mov rbx,qword ptr heap_size_65[rip] + shl rbx,6 + add rbx,qword ptr heap_p3[rip] + + mov qword ptr heap_end_after_gc[rip],rbx + + sub rbx,rdi + shr rbx,3 + + sub rbx,qword ptr n_allocated_words[rip] + mov r15,rbx + att_jc out_of_memory_4_1 + + mov rax,rbx + shl rax,2 + add rax,rbx + shl rax,4 + cmp rax,qword ptr _heap_size[rip] + att_jc out_of_memory_4_2 + + test byte ptr _flags[rip],64 + je no_mark_6 + + mov rax,qword ptr neg_heap_p3[rip] + add rax,rdi + mov rbx,qword ptr n_allocated_words[rip] + lea rax,[rax+rbx*8] + + mov rbx,qword ptr heap_size_65[rip] + shl rbx,6 + + imul qword ptr _heap_size_multiple[rip] + shrd rax,rdx,8 + shr rdx,8 + jne no_small_heap2 + + and rax,-4 + + cmp rax,8000 + jae not_too_small2 + mov rax,8000 +not_too_small2: + mov rcx,rbx + sub rcx,rax + att_jb no_small_heap2 + + sub qword ptr heap_end_after_gc[rip],rcx + shr rcx,3 + sub r15,rcx + + mov rbx,rax + +no_small_heap2: + shr rbx,3 + mov qword ptr bit_vector_size[rip],rbx + +no_mark_6: + jmp no_copy_garbage_collection + +no_copy_garbage_collection: + att_call add_compact_garbage_collect_time + + mov rax,rdi + sub rax,qword ptr heap_p3[rip] + + add qword ptr total_compact_gc_bytes[rip],rax + + mov rax,rdi + sub rax,qword ptr heap_p3[rip] + mov rbx,qword ptr n_allocated_words[rip] + lea rax,[rax+rbx*8] + + lea rcx,heap_use_after_compact_gc_string_1[rip] + att_jmp end_garbage_collect + + .globl clean_exception_handler_ + +clean_exception_handler_: + + att_jmp clean_exception_handler_ + + mov rax,qword ptr [rcx] + cmp dword ptr [rax],0x0c00000fd + je stack_overflow_exception + + cmp dword ptr [rax],0x80000001 + je guard_page_or_access_violation_exception + + cmp dword ptr [rax] ,0x0c0000005 + att_je guard_page_or_access_violation_exception + +no_stack_overflow_exception: + mov rax,0 + ret + +guard_page_or_access_violation_exception: + mov rax,qword ptr 16[rax] + and rax,-4096 + cmp qword ptr a_stack_guard_page[rip],rax + att_jne no_stack_overflow_exception + + cmp qword ptr a_stack_guard_page[rip],0 + att_je no_stack_overflow_exception + +stack_overflow_exception: + mov rax,qword ptr 8[rcx] + lea rax,stack_overflow[rip] + mov qword ptr (0x0F8)[rax],rax + + mov rax,-1 + ret + +stack_overflow: + att_call add_execute_time + + lea rbp,stack_overflow_string[rip] + att_jmp print_error + +_IO_error: + mov rbp,rsp + and rsp,-16 + + mov rbx,rcx + .if LINUX + lea rdi,IO_error_string[rip] + .else + sub rsp,32 + lea rcx,IO_error_string + .endif + att_call _ew_print_string + + .if LINUX + mov rdi,rbx + .else + mov rcx,rbx + .endif + att_call _ew_print_string + + .if LINUX + lea rdi,new_line_string[rip] + .else + lea rcx,new_line_string + .endif + att_call _ew_print_string + + mov rsp,rbp + + att_jmp halt + +print_error: + .if LINUX + mov rdi,rbp + .else + mov rcx,rbp + .endif + mov rbp,rsp + and rsp,-16 + att_call _ew_print_string + mov rsp,rbp + +halt: + mov rsp,halt_sp[rip] + + .if PROFILE + call write_profile_stack + .endif + + mov qword ptr _execution_aborted[rip],1 + + cmp qword ptr dll_initisialised[rip],0 + .if LINUX + att_je exit_ + .else + je exit + .endif + .if LINUX + cmp dword ptr _return_code[rip],0 + .else + cmp qword ptr return_code,0 + .endif + jne return_code_set + .if LINUX + mov dword ptr _return_code[rip],-1 + .else + mov qword ptr return_code,-1 + .endif +return_code_set: + .if LINUX + mov edi,dword ptr _return_code[rip] + and rsp,-16 + att_call _exit + .else + push qword ptr return_code + call (ExitProcess) + .endif + att_jmp return_code_set + +e__system__eaind: +__eaind: +eval_fill: + mov [rsi],rcx + add rsi,8 + mov rcx,rdx + call qword ptr [rdx] + mov rdx,rcx + mov rcx,(-8)[rsi] + sub rsi,8 + + mov rbp,[rdx] + mov [rcx],rbp + mov rbp,8[rdx] + mov 8[rcx],rbp + mov rbp,16[rdx] + mov 16[rcx],rbp + ret + + .align 2 + lea rax,e__system__eaind[rip] + jmp rax + .byte 0,0,0 + .long 0x80000000 /* e__system__dind */ + .long -2 +e__system__nind: +__indirection: + mov rdx,8[rcx] + mov rax,[rdx] + test al,2 + + je eval_fill2 + + mov [rcx],rax + mov rbp,8[rdx] + mov 8[rcx],rbp + mov rbp,16[rdx] + mov 16[rcx],rbp + ret + +eval_fill2: + lea r9,__cycle__in__spine[rip] + mov qword ptr [rcx],r9 + mov qword ptr [rsi],rcx + + test byte ptr _flags[rip],64 + att_je __cycle__in__spine + + add rsi,8 + mov rcx,rdx + call rax + mov rdx,rcx + mov rcx,qword ptr (-8)[rsi] + sub rsi,8 + + mov rbp,[rdx] + mov [rcx],rbp + mov rbp,8[rdx] + mov 8[rcx],rbp + mov rbp,16[rdx] + mov 16[rcx],rbp + ret + + .if PROFILE + call profile_n + mov rbp,rax + .endif +eval_upd_0: + lea r8,__indirection[rip] + mov qword ptr [rdx],r8 + mov 8[rdx],rcx + jmp rbp + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_1: + lea r8,__indirection[rip] + mov qword ptr [rdx],r8 + mov rax,8[rdx] + mov 8[rdx],rcx + mov rdx,rax + jmp rbp + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_2: + lea r8,__indirection[rip] + mov qword ptr [rdx],r8 + mov r8,8[rdx] + mov 8[rdx],rcx + mov rdx,16[rdx] + jmp rbp + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_3: + lea r8,__indirection[rip] + mov qword ptr [rdx],r8 + mov r8,8[rdx] + mov 8[rdx],rcx + mov [rsi],rcx + mov rcx,24[rdx] + add rsi,8 + mov rdx,16[rdx] + jmp rbp + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_4: + lea r8,__indirection[rip] + mov qword ptr [rdx],r8 + mov r8,8[rdx] + mov 8[rdx],rcx + mov [rsi],rcx + mov rbx,32[rdx] + mov 8[rsi],rbx + mov rcx,24[rdx] + add rsi,16 + mov rdx,16[rdx] + jmp rbp + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_5: + lea r8,__indirection[rip] + mov qword ptr [rdx],r8 + mov r8,8[rdx] + mov [rsi],rcx + mov 8[rdx],rcx + mov rbx,40[rdx] + mov 8[rsi],rbx + mov rbx,32[rdx] + mov 16[rsi],rbx + mov rcx,24[rdx] + add rsi,24 + mov rdx,16[rdx] + jmp rbp + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_6: + lea r8,__indirection[rip] + mov qword ptr [rdx],r8 + mov r8,8[rdx] + mov [rsi],rcx + mov 8[rdx],rcx + mov rbx,48[rdx] + mov 8[rsi],rbx + mov rbx,40[rdx] + mov 16[rsi],rbx + mov rbx,32[rdx] + mov 24[rsi],rbx + mov rcx,24[rdx] + add rsi,32 + mov rdx,16[rdx] + jmp rbp + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_7: + mov rax,0 + mov rbx,40 +eval_upd_n: + lea r8,__indirection[rip] + mov qword ptr [rdx],r8 + mov r8,8[rdx] + mov [rsi],rcx + mov 8[rdx],rcx + add rdx,rbx + mov rbx,16[rdx ] + mov 8[rsi],rbx + mov rbx,8[rdx] + mov 16[rsi],rbx + mov rbx,[rdx] + mov 24[rsi],rbx + add rsi,32 + +eval_upd_n_lp: + mov rbx,(-8)[rdx] + sub rdx,8 + mov [rsi],rbx + add rsi,8 + sub rax,1 + att_jnc eval_upd_n_lp + + mov rcx,(-8)[rdx] + mov rdx,(-16)[rdx ] + jmp rbp + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_8: + mov rax,1 + mov rbx,48 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_9: + mov rax,2 + mov rbx,56 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_10: + mov rax,3 + mov rbx,64 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_11: + mov rax,4 + mov rbx,72 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_12: + mov rax,5 + mov rbx,80 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_13: + mov rax,6 + mov rbx,88 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_14: + mov rax,7 + mov rbx,96 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_15: + mov rax,8 + mov rbx,104 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_16: + mov rax,9 + mov rbx,112 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_17: + mov rax,10 + mov rbx,120 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_18: + mov rax,11 + mov rbx,128 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_19: + mov rax,12 + mov rbx,136 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_20: + mov rax,13 + mov rbx,144 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_21: + mov rax,14 + mov rbx,152 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_22: + mov rax,15 + mov rbx,160 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_23: + mov rax,16 + mov rbx,168 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_24: + mov rax,17 + mov rbx,176 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_25: + mov rax,18 + mov rbx,184 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_26: + mov rax,19 + mov rbx,192 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_27: + mov rax,20 + mov rbx,200 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_28: + mov rax,21 + mov rbx,208 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_29: + mov rax,22 + mov rbx,216 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_30: + mov rax,23 + mov rbx,224 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_31: + mov rax,24 + mov rbx,232 + att_jmp eval_upd_n + + .if PROFILE + att_call profile_n + mov rbp,rax + .endif +eval_upd_32: + mov rax,25 + mov rbx,240 + att_jmp eval_upd_n + +/* */ +/* STRINGS */ +/* */ + +catAC: + mov rax,8[rcx] + mov rbx,8[rdx] + lea rbp,16+7[rax+rbx] + shr rbp,3 + sub r15,rbp + jl gc_3 +gc_r_3: + add rcx,16 + add rdx,16 + +/* fill_node */ + + mov r8,rdi + lea rbp,__STRING__+2[rip] + mov qword ptr [rdi],rbp + +/* store length */ + + lea rbp,[rax+rbx] + mov 8[rdi],rbp + add rdi,16 + +/* copy string 1 */ + + lea rbp,7[rbx] + shr rbp,3 + add rbx,rdi + + xchg rcx,rbp + xchg rsi,rdx + cld + rep movsq + mov rsi,rdx + mov rcx,rbp + + mov rdi,rbx + +/* copy_string 2 */ + +cat_string_6: + mov rbp,rax + shr rbp,3 + je cat_string_9 + +cat_string_7: + mov rbx,[rcx] + add rcx,8 + mov [rdi],rbx + add rdi,8 + dec rbp + att_jne cat_string_7 + +cat_string_9: + test al,4 + je cat_string_10 + mov ebx,dword ptr [rcx] + add rcx,4 + mov dword ptr [rdi],ebx + add rdi,4 +cat_string_10: + test al,2 + je cat_string_11 + mov bx,word ptr [rcx] + add rcx,2 + mov word ptr [rdi],bx + add rdi,2 +cat_string_11: + test al,1 + je cat_string_12 + mov bl,byte ptr [rcx] + mov byte ptr [rdi],bl + inc rdi +cat_string_12: + + mov rcx,r8 +/* .align heap pointer */ + add rdi,7 + and rdi,-8 + ret + +gc_3: att_call collect_2 + att_jmp gc_r_3 + +empty_string: + lea rcx,zero_length_string[rip] + ret + +sliceAC: + mov rbp,8[rcx] + test rbx,rbx + jns slice_string_1 + xor rbx,rbx +slice_string_1: + cmp rbx,rbp + att_jge empty_string + cmp rax,rbx + att_jl empty_string + inc rax + cmp rax,rbp + jle slice_string_2 + mov rax,rbp +slice_string_2: + sub rax,rbx + + lea rbp,(16+7)[rax] + shr rbp,3 + + sub r15,rbp + jl gc_4 +r_gc_4: + sub rbp,2 + lea rdx,16[rcx+rbx] + + lea rcx,__STRING__+2[rip] + mov qword ptr [rdi],rcx + mov 8[rdi],rax + +/* copy part of string */ + mov rcx,rbp + mov rbp,rdi + add rdi,16 + + xchg rsi,rdx + cld + rep movsq + mov rsi,rdx + mov rcx,rbp + ret + +gc_4: + mov rbp,rdx + att_call collect_1 + lea rbp,(16+7)[rax] + shr rbp,3 + att_jmp r_gc_4 + +updateAC: + mov rbp,8[rcx] + cmp rbx,rbp + jae update_string_error + + add rbp,16+7 + shr rbp,3 + + sub r15,rbp + jl gc_5 +r_gc_5: + mov rbp,8[rcx] + add rbp,7 + shr rbp,3 + + mov rdx,rcx + mov r8,rdi + lea rcx,__STRING__+2[rip] + mov qword ptr [rdi],rcx + mov rcx,8[rdx] + add rdx,16 + mov 8[rdi],rcx + add rdi,16 + + add rbx,rdi + + mov rcx,rbp + xchg rsi,rdx + cld + rep movsq + mov rsi,rdx + + mov byte ptr [rbx],al + mov rcx,r8 + ret + +gc_5: att_call collect_1 + att_jmp r_gc_5 + +update_string_error: + lea rbp,high_index_string[rip] + test rax,rax + jns update_string_error_2 + lea rbp,low_index_string[rip] +update_string_error_2: + att_jmp print_error + +eqAC: + mov rax,8[rcx] + cmp rax,8[rdx] + jne equal_string_ne + add rcx,16 + add rdx,16 + mov rbx,rax + and rbx,7 + shr rax,3 + je equal_string_d +equal_string_1: + mov rbp,[rcx] + cmp rbp,[rdx] + att_jne equal_string_ne + add rcx,8 + add rdx,8 + dec rax + att_jne equal_string_1 +equal_string_d: + test bl,4 + je equal_string_w + mov eax,dword ptr [rcx] + cmp eax,dword ptr [rdx] + att_jne equal_string_ne + add rcx,4 + add rdx,4 +equal_string_w: + test bl,2 + je equal_string_b + mov ax,word ptr [rcx] + cmp ax,word ptr [rdx] + att_jne equal_string_ne + add rcx,2 + add rdx,2 +equal_string_b: + test bl,1 + je equal_string_eq + mov bl,byte ptr [rcx] + cmp bl,byte ptr [rdx] + att_jne equal_string_ne +equal_string_eq: + mov rax,1 + ret +equal_string_ne: + xor rax,rax + ret + +cmpAC: + mov rbx,8[rcx] + mov rbp,8[rdx] + add rcx,16 + add rdx,16 + cmp rbp,rbx + jb cmp_string_less + ja cmp_string_more + xor rax,rax + jmp cmp_string_chars +cmp_string_more: + mov rax,1 + att_jmp cmp_string_chars +cmp_string_less: + mov rax,-1 + mov rbx,rbp + att_jmp cmp_string_chars + +cmp_string_1: + mov rbp,[rdx] + cmp rbp,[rcx] + jne cmp_string_ne_q + add rdx,8 + add rcx,8 +cmp_string_chars: + sub rbx,8 + att_jnc cmp_string_1 +cmp_string_d: + test bl,4 + je cmp_string_w + mov ebp,dword ptr [rdx] + cmp ebp,dword ptr [rcx] + jne cmp_string_ne_d + add rdx,4 + add rcx,4 +cmp_string_w: + test bl,2 + je cmp_string_b + mov bpl,byte ptr [rdx] + cmp bpl,byte ptr [rcx] + jne cmp_string_ne + mov bpl,byte ptr 1[rdx] + cmp bpl,byte ptr 1[rcx] + att_jne cmp_string_ne + add rdx,2 + add rcx,2 +cmp_string_b: + test bl,1 + je cmp_string_eq + mov bl,byte ptr [rdx] + cmp bl,byte ptr [rcx] + att_jne cmp_string_ne +cmp_string_eq: + ret +cmp_string_ne_d: + mov r10d,[rcx] + bswap ebp + bswap r10d + cmp ebp,r10d + att_jmp cmp_string_ne +cmp_string_ne_q: + mov r10,[rcx] + bswap rbp + bswap r10 + cmp rbp,r10 +cmp_string_ne: + ja cmp_string_r1 + mov rax,-1 + ret +cmp_string_r1: + mov rax,1 + ret + +string_to_string_node: + mov rax,qword ptr [rcx] + add rcx,8 + + lea rbx,16+7[rax] + shr rbx,3 + + sub r15,rbx + jl string_to_string_node_gc + +string_to_string_node_r: + sub rbx,2 + lea rbp,__STRING__+2[rip] + mov qword ptr [rdi],rbp + mov qword ptr 8[rdi],rax + mov rbp,rdi + add rdi,16 + jmp string_to_string_node_4 + +string_to_string_node_2: + mov rax,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rax + add rdi,8 +string_to_string_node_4: + sub rbx,1 + att_jge string_to_string_node_2 + + mov rcx,rbp + ret + +string_to_string_node_gc: + push rcx + att_call collect_0 + pop rcx + att_jmp string_to_string_node_r + + +int_array_to_node: + mov rax,qword ptr -16[rcx] + lea rbx,3[rax] + sub r15,rbx + jl int_array_to_node_gc + +int_array_to_node_r: + lea rbx,__ARRAY__+2[rip] + mov qword ptr [rdi],rbx + mov rdx,rcx + mov qword ptr 8[rdi],rax + mov rcx,rdi + lea rbx,dINT+2[rip] + mov qword ptr 16[rdi],rbx + add rdi,24 + jmp int_or_real_array_to_node_4 + +int_or_real_array_to_node_2: + mov rbx,qword ptr [rdx] + add rdx,8 + mov qword ptr [rdi],rbx + add rdi,8 +int_or_real_array_to_node_4: + sub rax,1 + att_jge int_or_real_array_to_node_2 + + ret + +int_array_to_node_gc: + push rcx + att_call collect_0 + pop rcx + att_jmp int_array_to_node_r + + +real_array_to_node: + mov rax,qword ptr -16[rcx] + lea rbx,3[rax] + sub r15,rbx + jl real_array_to_node_gc + +real_array_to_node_r: + lea rbx,__ARRAY__+2[rip] + mov qword ptr [rdi],rbx + mov rdx,rcx + mov qword ptr 8[rdi],rax + mov rcx,rdi + lea rbx,REAL+2[rip] + mov qword ptr 16[rdi],rbx + add rdi,24 + att_jmp int_or_real_array_to_node_4 + +real_array_to_node_gc: + push rcx + att_call collect_0 + pop rcx + att_jmp real_array_to_node_r + + + .align 2 + .long 3 +_c3: att_jmp __cycle__in__spine + .align 2 + + .long 4 +_c4: att_jmp __cycle__in__spine + .align 2 + .long 5 +_c5: att_jmp __cycle__in__spine + .align 2 + .long 6 +_c6: att_jmp __cycle__in__spine + .align 2 + .long 7 +_c7: att_jmp __cycle__in__spine + .align 2 + .long 8 +_c8: att_jmp __cycle__in__spine + .align 2 + .long 9 +_c9: att_jmp __cycle__in__spine + .align 2 + .long 10 +_c10: att_jmp __cycle__in__spine + .align 2 + .long 11 +_c11: att_jmp __cycle__in__spine + .align 2 + .long 12 +_c12: att_jmp __cycle__in__spine + .align 2 + .long 13 +_c13: att_jmp __cycle__in__spine + .align 2 + .long 14 +_c14: att_jmp __cycle__in__spine + .align 2 + .long 15 +_c15: att_jmp __cycle__in__spine + .align 2 + .long 16 +_c16: att_jmp __cycle__in__spine + .align 2 + .long 17 +_c17: att_jmp __cycle__in__spine + .align 2 + .long 18 +_c18: att_jmp __cycle__in__spine + .align 2 + .long 19 +_c19: att_jmp __cycle__in__spine + .align 2 + .long 20 +_c20: att_jmp __cycle__in__spine + .align 2 + .long 21 +_c21: att_jmp __cycle__in__spine + .align 2 + .long 22 +_c22: att_jmp __cycle__in__spine + .align 2 + .long 23 +_c23: att_jmp __cycle__in__spine + .align 2 + .long 24 +_c24: att_jmp __cycle__in__spine + .align 2 + .long 25 +_c25: att_jmp __cycle__in__spine + .align 2 + .long 26 +_c26: att_jmp __cycle__in__spine + .align 2 + .long 27 +_c27: att_jmp __cycle__in__spine + .align 2 + .long 28 +_c28: att_jmp __cycle__in__spine + .align 2 + .long 29 +_c29: att_jmp __cycle__in__spine + .align 2 + .long 30 +_c30: att_jmp __cycle__in__spine + .align 2 + .long 31 +_c31: att_jmp __cycle__in__spine + .align 2 + .long 32 +_c32: att_jmp __cycle__in__spine + +/* */ +/* ARRAYS */ +/* */ + +_create_arrayB: + mov rbx,rax + add rax,24+7 + shr rax,3 + sub r15,rax + jge no_collect_4574 + att_call collect_0 +no_collect_4574: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rbx + lea rdx,BOOL+2[rip] + mov qword ptr 16[rdi],rdx + lea rdi,[rdi+rax*8] + ret + +_create_arrayC: + mov rbx,rax + add rax,16+7 + shr rax,3 + sub r15,rax + jge no_collect_4573 + att_call collect_0 +no_collect_4573: + mov rcx,rdi + lea rdx,__STRING__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rbx + lea rdi,[rdi+rax*8] + ret + +_create_arrayI: + lea rbp,3[rax] + sub r15,rbp + jge no_collect_4572 + att_call collect_0 +no_collect_4572: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rax + lea rbp,dINT+2[rip] + mov qword ptr 16[rdi],rbp + lea rdi,24[rdi+rax*8] + ret + +_create_arrayI32: + mov rbx,rax + add rax,6+1 + shr rax,1 + sub r15,rax + jge no_collect_3572 + att_call collect_0 +no_collect_3572: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rbx + lea rdx,INT32+2[rip] + mov qword ptr 16[rdi],rdx + lea rdi,[rdi+rax*8] + ret + +_create_arrayR: + lea rbp,3[rax] + sub r15,rbp + jge no_collect_4580 + att_call collect_0 +no_collect_4580: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rax + lea rdx,REAL+2[rip] + mov qword ptr 16[rdi],rdx + lea rdi,24[rdi+rax*8] + ret + +_create_arrayR32: + mov rbx,rax + add rax,6+1 + shr rax,1 + sub r15,rax + jge no_collect_3580 + att_call collect_0 +no_collect_3580: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rax + lea rdx,REAL32+2[rip] + mov qword ptr 16[rdi],rdx + lea rdi,[rdi+rax*8] + ret + +/* rax : number of elements, rbx: element descriptor */ +/* r10 : element size, r11 : element a size, rcx :a_element-> rcx : array */ + +_create_r_array: + mov rbp,rax + imul rbp,r10 + add rbp,3 + sub r15,rbp + jge no_collect_4586 + att_call collect_1 +no_collect_4586: + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov rdx,rcx + mov qword ptr 8[rdi],rax + mov rcx,rdi + mov qword ptr 16[rdi],rbx + add rdi,24 + + test r11,r11 + je _create_r_array_0 + sub r11,2 + jc _create_r_array_1 + je _create_r_array_2 + sub r11,2 + jc _create_r_array_3 + je _create_r_array_4 + jmp _create_r_array_5 + +_create_r_array_0: + imul r10,rax + lea rdi,[rdi+r10*8] + ret + +_create_r_array_1: + shl r10,3 + jmp _st_fillr1_array +_fillr1_array: + mov qword ptr [rdi],rdx + add rdi,r10 +_st_fillr1_array: + sub rax,1 + att_jnc _fillr1_array + ret + +_create_r_array_2: + shl r10,3 + jmp _st_fillr2_array +_fillr2_array: + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rdx + add rdi,r10 +_st_fillr2_array: + sub rax,1 + att_jnc _fillr2_array + ret + +_create_r_array_3: + shl r10,3 + jmp _st_fillr3_array +_fillr3_array: + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rdx + mov qword ptr 16[rdi],rdx + add rdi,r10 +_st_fillr3_array: + sub rax,1 + att_jnc _fillr3_array + ret + +_create_r_array_4: + shl r10,3 + jmp _st_fillr4_array +_fillr4_array: + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rdx + mov qword ptr 16[rdi],rdx + mov qword ptr 24[rdi],rdx + add rdi,r10 +_st_fillr4_array: + sub rax,1 + att_jnc _fillr4_array + ret + +_create_r_array_5: + sub r10,4 + sub r10,r11 + sub r11,1 + shl r10,3 + jmp _st_fillr5_array + +_fillr5_array: + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rdx + mov qword ptr 16[rdi],rdx + mov qword ptr 24[rdi],rdx + add rdi,32 + + mov rbx,r11 +_copy_elem_5_lp: + mov qword ptr [rdi],rdx + add rdi,8 + sub rbx,1 + att_jnc _copy_elem_5_lp + + add rdi,r10 +_st_fillr5_array: + sub rax,1 + att_jnc _fillr5_array + + ret + +create_arrayB: + mov r10,rbx + add rbx,24+7 + shr rbx,3 + sub r15,rbx + jge no_collect_4575 + att_call collect_0 +no_collect_4575: + mov rbp,rax + sub rbx,3 + shl rbp,8 + or rax,rbp + mov rbp,rax + shl rbp,16 + or rax,rbp + mov rbp,rax + shl rbp,32 + or rax,rbp + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],r10 + lea rdx,BOOL+2[rip] + mov qword ptr 16[rdi],rdx + add rdi,24 + jmp create_arrayBCI + +create_arrayC: + mov r10,rbx + add rbx,16+7 + shr rbx,3 + sub r15,rbx + jge no_collect_4578 + att_call collect_0 +no_collect_4578: + mov rbp,rax + sub rbx,2 + shl rbp,8 + or rax,rbp + mov rbp,rax + shl rbp,16 + or rax,rbp + mov rbp,rax + shl rbp,32 + or rax,rbp + mov rcx,rdi + lea rdx,__STRING__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],r10 + add rdi,16 + att_jmp create_arrayBCI + +create_arrayI32: + mov r10,rbx + add rbx,6+1 + shr rbx,1 + sub r15,rbx + jge no_collect_3577 + att_call collect_0 +no_collect_3577: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],r10 + lea rdx,INT32+2[rip] + mov qword ptr 16[rdi],rdx + add rdi,24 + + sub rbx,3 + + mov ebp,eax + shl rax,32 + or rax,rbp + att_jmp create_arrayBCI + +create_arrayI: + lea rbp,3[rbx] + sub r15,rbp + jge no_collect_4577 + att_call collect_0 +no_collect_4577: + mov rcx,rdi + lea rbp,__ARRAY__+2[rip] + mov qword ptr [rdi],rbp + mov qword ptr 8[rdi],rbx + lea rbp,dINT+2[rip] + mov qword ptr 16[rdi],rbp + add rdi,24 +create_arrayBCI: + mov rdx,rbx + shr rbx,1 + test dl,1 + je st_filli_array + + mov qword ptr [rdi],rax + add rdi,8 + att_jmp st_filli_array + +filli_array: + mov qword ptr [rdi],rax + mov qword ptr 8[rdi],rax + add rdi,16 +st_filli_array: + sub rbx,1 + att_jnc filli_array + + ret + +create_arrayR32: + cvtsd2ss xmm0,xmm0 + movss dword ptr (-8)[rsp],xmm0 + mov r10,rax + add rax,6+1 + shr rax,1 + mov ebx,dword ptr (-8)[rsp] + sub r15,rax + jge no_collect_3579 + att_call collect_0 +no_collect_3579: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],r10 + lea rdx,REAL32+2[rip] + mov qword ptr 16[rdi],rdx + add rdi,24 + + sub rax,3 + + mov edx,ebx + shl rbx,32 + or rbx,rdx + jmp st_fillr_array + +create_arrayR: + movsd qword ptr (-8)[rsp],xmm0 + lea rbp,3[rax] + + mov rbx,qword ptr (-8)[rsp] + + sub r15,rbp + jge no_collect_4579 + att_call collect_0 +no_collect_4579: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rax + lea rdx,REAL+2[rip] + mov qword ptr 16[rdi],rdx + add rdi,24 + att_jmp st_fillr_array +fillr_array: + mov qword ptr [rdi],rbx + add rdi,8 +st_fillr_array: + sub rax,1 + att_jnc fillr_array + + ret + +create_array: + lea rbp,3[rax] + sub r15,rbp + jge no_collect_4576 + att_call collect_1 +no_collect_4576: + mov rbx,rcx + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rax + mov qword ptr 16[rdi],0 + add rdi,24 + + jmp fillr1_array + + + + +/* in rax: number of elements, rbx: element descriptor */ +/* r10 : element size, r11 : element a size -> rcx : array */ + +create_R_array: + sub r10,2 + jc create_R_array_1 + je create_R_array_2 + sub r10,2 + jc create_R_array_3 + je create_R_array_4 + jmp create_R_array_5 + +create_R_array_1: + lea rbp,3[rax] + sub r15,rbp + jge no_collect_4581 + att_call collect_0 +no_collect_4581: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rax + mov qword ptr 16[rdi],rbx + add rdi,24 + + test r11,r11 + je r_array_1_b + + mov rbx,qword ptr (-8)[rsi] + att_jmp fillr1_array + +r_array_1_b: + mov rbx,qword ptr 8[rsp] + +fillr1_array: + mov rdx,rax + shr rax,1 + test dl,1 + je st_fillr1_array_1 + + mov qword ptr [rdi],rbx + add rdi,8 + att_jmp st_fillr1_array_1 + +fillr1_array_lp: + mov qword ptr [rdi],rbx + mov qword ptr 8[rdi],rbx + add rdi,16 +st_fillr1_array_1: + sub rax,1 + att_jnc fillr1_array_lp + + ret + +create_R_array_2: + lea rbp,3[rax*2] + sub r15,rbp + jge no_collect_4582 + att_call collect_0 +no_collect_4582: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rax + mov qword ptr 16[rdi],rbx + add rdi,24 + + sub r11,1 + jc r_array_2_bb + je r_array_2_ab +r_array_2_aa: + mov rbx,qword ptr (-8)[rsi] + mov rbp,qword ptr (-16)[rsi] + jmp st_fillr2_array +r_array_2_ab: + mov rbx,qword ptr (-8)[rsi] + mov rbp,qword ptr 8[rsp] + att_jmp st_fillr2_array +r_array_2_bb: + mov rbx,qword ptr 8[rsp] + mov rbp,qword ptr 16[rsp] + att_jmp st_fillr2_array + +fillr2_array_1: + mov qword ptr [rdi],rbx + mov qword ptr 8[rdi],rbp + add rdi,16 +st_fillr2_array: + sub rax,1 + att_jnc fillr2_array_1 + + ret + +create_R_array_3: + lea rbp,3[rax+rax*2] + sub r15,rbp + jge no_collect_4583 + att_call collect_0 +no_collect_4583: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rax + mov qword ptr 16[rdi],rbx + add rdi,24 + + pop rdx + mov r12,rsp + + test r11,r11 + je r_array_3 + + lea r13,0[r11*8] + mov rbp,rsi + sub rbp,r13 + + sub r11,1 + +copy_a_to_b_lp3: + push [rbp] + add rbp,8 + sub r11,1 + att_jnc copy_a_to_b_lp3 + +r_array_3: + mov rbx,qword ptr [rsp] + mov r13,qword ptr 8[rsp] + mov rbp,qword ptr 16[rsp] + + mov rsp,r12 + push rdx + + jmp st_fillr3_array + +fillr3_array_1: + mov qword ptr [rdi],rbx + mov qword ptr 8[rdi],r13 + mov qword ptr 16[rdi],rbp + add rdi,24 +st_fillr3_array: + sub rax,1 + att_jnc fillr3_array_1 + + ret + +create_R_array_4: + lea rbp,3[rax+4] + sub r15,rbp + jge no_collect_4584 + att_call collect_0 +no_collect_4584: + mov rcx,rdi + lea rdx,__ARRAY__+2[rip] + mov qword ptr [rdi],rdx + mov qword ptr 8[rdi],rax + mov qword ptr 16[rdi],rbx + add rdi,24 + + pop rdx + mov r12,rsp + + test r11,r11 + je r_array_4 + + lea r13,0[r11*8] + mov rbp,rsi + sub rbp,r13 + sub r11,1 + +copy_a_to_b_lp4: + push [rbp] + add rbp,8 + sub r11,1 + att_jnc copy_a_to_b_lp4 + +r_array_4: + mov rbx,qword ptr [rsp] + mov r13,qword ptr 8[rsp] + mov r14,qword ptr 16[rsp] + mov rbp,qword ptr 24[rsp] + + mov rsp,r12 + push rdx + + jmp st_fillr4_array + +fillr4_array: + mov qword ptr [rdi],rbx + mov qword ptr 8[rdi],r13 + mov qword ptr 16[rdi],r14 + mov qword ptr 24[rdi],rbp + add rdi,32 +st_fillr4_array: + sub rax,1 + att_jnc fillr4_array + + ret + +create_R_array_5: + lea r12,4[r10] + mov rbp,rax + imul rbp,r12 + add rbp,3 + sub r15,rbp + jge no_collect_4585 + att_call collect_0 +no_collect_4585: + lea rcx,__ARRAY__+2[rip] + mov qword ptr [rdi],rcx + mov qword ptr 8[rdi],rax + mov qword ptr 16[rdi],rbx + mov rcx,rdi + add rdi,24 + + pop rdx + mov r12,rsp + + test r11,r11 + je r_array_5 + + lea r13,0[r11*8] + mov rbp,rsi + sub rbp,r13 + sub r11,1 + +copy_a_to_b_lp5: + push [rbp] + add rbp,8 + sub r11,1 + att_jnc copy_a_to_b_lp5 + +r_array_5: + mov r13,qword ptr [rsp] + mov r14,qword ptr 8[rsp] + mov r8,qword ptr 16[rsp] + mov r9,qword ptr 24[rsp] + add rsp,32 + + sub r10,1 + jmp st_fillr5_array + +fillr5_array_1: + mov qword ptr [rdi],r13 + mov qword ptr 8[rdi],r14 + + mov r11,rsp + mov rbx,r10 + + mov qword ptr 16[rdi],r8 + mov qword ptr 24[rdi],r9 + add rdi,32 + +copy_elem_lp5: + mov rbp,qword ptr [r11] + add r11,8 + mov qword ptr [rdi],rbp + add rdi,8 + sub rbx,1 + att_jnc copy_elem_lp5 + +st_fillr5_array: + sub rax,1 + att_jnc fillr5_array_1 + + mov rsp,r12 + jmp rdx + + .if ! NEW_DESCRIPTORS +yet_args_needed: +/* for more than 4 arguments */ + mov r10,[rdx] + movzx rax,word ptr (-2)[r10] + add rax,3 + sub r15,rax + jl gc_1 +gc_r_1: sub rax,3+1+4 + mov rbx,8[rdx] + add r10,8 + mov rdx,16[rdx] + mov rbp,rdi + mov r8,[rdx] + mov [rdi],r8 + mov r8,8[rdx] + mov 8[rdi],r8 + mov r8,16[rdx] + mov 16[rdi],r8 + add rdx,24 + add rdi,24 + +cp_a: mov r8,[rdx] + add rdx,8 + mov [rdi],r8 + add rdi,8 + sub rax,1 + jge cp_a + + mov [rdi],rcx + mov 8[rdi],r10 + lea rcx,8[rdi] + mov 16[rdi],rbx + mov 24[rdi],rbp + add rdi,32 + ret + +gc_1: + call collect_2 + jmp gc_r_1 + +yet_args_needed_0: + sub r15,2 + jl gc_20 +gc_r_20: + mov 8[rdi],rcx + mov rax,[rdx] + mov rcx,rdi + add rax,8 + mov [rdi],rax + add rdi,16 + ret + +gc_20: + call collect_2 + jmp gc_r_20 + +yet_args_needed_1: + sub r15,3 + jl gc_21 +gc_r_21: + mov 16[rdi],rcx + mov rax,[rdx] + mov rcx,rdi + add rax,8 + mov [rdi],rax + mov rbx,8[rdx] + mov 8[rdi],rbx + add rdi,24 + ret + +gc_21: + call collect_2 + jmp gc_r_21 + +yet_args_needed_2: + sub r15,5 + jl gc_22 +gc_r_22: + mov rax,[rdx] + mov 8[rdi],rcx + add rax,8 + mov rbp,8[rdx] + mov 16[rdi],rax + lea rcx,16[rdi] + mov 24[rdi],rbp + mov rbp,16[rdx] + mov [rdi],rbp + mov 32[rdi],rdi + add rdi,40 + ret + +gc_22: + call collect_2 + jmp gc_r_22 + +yet_args_needed_3: + sub r15,6 + jl gc_23 +gc_r_23: + mov rax,[rdx] + mov 16[rdi],rcx + add rax,8 + mov rbp,8[rdx] + mov 24[rdi],rax + mov rdx,16[rdx] + mov 32[rdi],rbp + mov rbp,[rdx] + mov 40[rdi],rdi + mov [rdi],rbp + mov rbp,8[rdx] + lea rcx,24[rdi] + mov 8[rdi],rbp + add rdi,48 + ret + +gc_23: + call collect_2 + jmp gc_r_23 + +yet_args_needed_4: + sub r15,7 + jl gc_24 +gc_r_24: + mov rax,[rdx] + mov 24[rdi],rcx + add rax,8 + mov rbp,8[rdx] + mov 32[rdi],rax + mov rdx,16[rdx] + mov 40[rdi],rbp + mov rbp,[rdx] + mov 48[rdi],rdi + mov [rdi],rbp + mov rbp,8[rdx] + lea rcx,32[rdi] + mov 8[rdi],rbp + mov rbp,16[rdx ] + mov 16[rdi],rbp + add rdi,56 + ret + +gc_24: + call collect_2 + jmp gc_r_24 + .endif + +repl_args_b: + test rax,rax + jle repl_args_b_1 + + dec rax + je repl_args_b_4 + + mov rdx,16[rcx] + sub rbx,2 + jne repl_args_b_2 + + mov [rsi],rdx + add rsi,8 + att_jmp repl_args_b_4 + +repl_args_b_2: + lea rdx,[rdx+rax*8] + +repl_args_b_3: + mov rbp,(-8)[rdx] + sub rdx,8 + mov [rsi],rbp + add rsi,8 + dec rax + att_jne repl_args_b_3 + +repl_args_b_4: + mov rbp,8[rcx] + mov [rsi],rbp + add rsi,8 +repl_args_b_1: + ret + +push_arg_b: + cmp rbx,2 + jb push_arg_b_1 + jne push_arg_b_2 + cmp rbx,rax + att_je push_arg_b_1 +push_arg_b_2: + mov rcx,16[rcx] + sub rbx,2 +push_arg_b_1: + mov rcx,[rcx+rbx*8] + ret + +del_args: + mov rbx,[rcx] + sub rbx,rax + movsx rax,word ptr (-2)[rbx] + sub rax,2 + jge del_args_2 + + mov [rdx],rbx + mov rbp,8[rcx] + mov 8[rdx],rbp + mov rbp,16[rcx] + mov 16[rdx],rbp + ret + +del_args_2: + jne del_args_3 + + mov [rdx],rbx + mov rbp,8[rcx] + mov 8[rdx],rbp + mov rbp,16[rcx] + mov rbp,[rbp] + mov 16[rdx],rbp + ret + +del_args_3: + sub r15,rax + jl del_args_gc +del_args_r_gc: + mov [rdx],rbx + mov 16[rdx],rdi + mov rbp,8[rcx] + mov rcx,16[rcx] + mov 8[rdx],rbp + +del_args_copy_args: + mov rbp,[rcx] + add rcx,8 + mov [rdi],rbp + add rdi,8 + sub rax,1 + att_jg del_args_copy_args + + ret + +del_args_gc: + att_call collect_2 + att_jmp del_args_r_gc + + .if USE_LIBM +cos_real: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + call cos + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + ret + +sin_real: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + call sin + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + ret + +tan_real: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + call tan + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + ret + +atan_real: + mov rbp,rsp + and rsp,-16 + mov r13,rsi + mov r14,rdi + call atan + mov rsp,rbp + mov rsi,r13 + mov rdi,r14 + ret + +asin_real: +acos_real: +ln_real: +log10_real: +exp_real: +pow_real: +exp2_real_: +_c_log10: +_c_pow: +_c_entier: + int 3 + ret + .endif + +entier_real: + cvttsd2si rax,xmm0 + ucomisd xmm0,qword ptr real_0_0[rip] + jb entier_real_m + ret + +entier_real_m: + movsd qword ptr (-8)[rsp],xmm0 + mov rcx,qword ptr (-8)[rsp] + mov rbx,rcx + shr rcx,52 + cmp rcx,0x0bff + jb entier_real_m_small + cmp rcx,0x0bff+52 + jae entier_real_m_large + sub rcx,0x0bff-12 + shl rbx,cl + je entier_m_exact +entier_real_m_small: + sub rax,1 +entier_real_m_large: +entier_m_exact: + ret + +r_to_i_real: + cvtsd2si rax,xmm0 + ret + + .globl getheapend + +getheapend: + lea rbx,[rdi+r15*8] + mov rax,heap_end_after_gc[rip] + ret + + .include "areals.s" + + .if PROFILE + .if TRACE + .include "atrace.s" + .else + .include "aprofile.s" + .endif + .endif + + .if NEW_DESCRIPTORS + .include "aap.s" + .endif + diff --git a/macho64/atrace.s b/macho64/atrace.s new file mode 100644 index 0000000..4f9bf6d --- /dev/null +++ b/macho64/atrace.s @@ -0,0 +1,474 @@ + + .text + + .globl init_profiler + .globl profile_r + .globl profile_l + .globl profile_l2 + .globl profile_n + .globl profile_n2 + .globl profile_s + .globl profile_s2 + .globl profile_t + .globl write_profile_stack + .globl stack_trace_depth + + .if ! LINUX + .globl allocate_memory + .endif + .globl __STRING__ + .globl _ab_stack_size + .globl _ew_print_string + .globl _ew_print_char + .globl _ew_print_text +/* .globl print_error */ +/* .globl profile_stack_pointer */ + +next = 0 +name_ = 8 +FunctionProfile = 16 + +profile_t: + sub qword ptr profile_stack_pointer[rip],8 + ret + +profile_r: + sub qword ptr profile_stack_pointer[rip],8 + ret + +profile_l: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_l +allocate_function_profile_record_lr: + mov rbp,qword ptr profile_stack_pointer[rip] + + mov qword ptr [rbp],rbx + add rbp,8 + mov qword ptr profile_stack_pointer[rip],rbp + + pop rbx + ret + +allocate_function_profile_record_l: + call allocate_function_profile_record + att_jmp allocate_function_profile_record_lr + +profile_l2: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_l2 +allocate_function_profile_record_l2r: + mov rbp,qword ptr profile_stack_pointer[rip] + + mov qword ptr [rbp],rbx + mov qword ptr 8[rbp],rbx + add rbp,16 + mov qword ptr profile_stack_pointer[rip],rbp + + pop rbx + ret + +allocate_function_profile_record_l2: + att_call allocate_function_profile_record + att_jmp allocate_function_profile_record_l2r + +profile_n: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_n +allocate_function_profile_record_nr: + mov rbp,qword ptr profile_stack_pointer[rip] + + mov qword ptr [rbp],rbx + add rbp,8 + mov qword ptr profile_stack_pointer[rip],rbp + + pop rbx + ret + +allocate_function_profile_record_n: + att_call allocate_function_profile_record + att_jmp allocate_function_profile_record_nr + +profile_n2: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_n2 +allocate_function_profile_record_n2r: + mov rbp,qword ptr profile_stack_pointer[rip] + + mov qword ptr [rbp],rbx + mov qword ptr 8[rbp],rbx + add rbp,16 + mov qword ptr profile_stack_pointer[rip],rbp + + pop rbx + ret + +allocate_function_profile_record_n2: + att_call allocate_function_profile_record + att_jmp allocate_function_profile_record_n2r + +profile_s2: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_s2 +allocate_function_profile_record_s2r: + mov rbp,qword ptr profile_stack_pointer[rip] + + mov qword ptr [rbp],rbx + mov qword ptr 8[rbp],rbx + add rbp,16 + mov qword ptr profile_stack_pointer[rip],rbp + + pop rbx + ret + +allocate_function_profile_record_s2: + att_call allocate_function_profile_record + att_jmp allocate_function_profile_record_s2r + +profile_s: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_s +allocate_function_profile_record_sr: + mov rbp,qword ptr profile_stack_pointer[rip] + + mov qword ptr [rbp],rbx + add rbp,8 + mov qword ptr profile_stack_pointer[rip],rbp + + pop rbx + ret + +allocate_function_profile_record_s: + att_call allocate_function_profile_record + att_jmp allocate_function_profile_record_sr + +/* argument: rbp: function name adress-4 */ +/* result: rbx: function profile record adress */ + +allocate_function_profile_record: + push rax + mov rax,qword ptr global_n_free_records_in_block[rip] + mov rbx,qword ptr global_last_allocated_block[rip] + + test rax,rax + jne no_alloc + + push rcx + push rdx + push rbp + + .if LINUX + sub rsp,104 + mov qword ptr [rsp],rsi + mov qword ptr 8[rsp],rdi + mov qword ptr 16[rsp],r8 + mov qword ptr 24[rsp],r10 + mov qword ptr 32[rsp],r11 + movsd qword ptr 40[rsp],xmm0 + movsd qword ptr 48[rsp],xmm1 + movsd qword ptr 56[rsp],xmm2 + movsd qword ptr 64[rsp],xmm3 + movsd qword ptr 72[rsp],xmm4 + movsd qword ptr 80[rsp],xmm5 + movsd qword ptr 88[rsp],xmm6 + movsd qword ptr 96[rsp],xmm7 + .else + sub rsp,72 + mov qword ptr [rsp],r8 + mov qword ptr 8[rsp],r10 + mov qword ptr 16[rsp],r11 + movsd qword ptr 24[rsp],xmm0 + movsd qword ptr 32[rsp],xmm1 + movsd qword ptr 40[rsp],xmm2 + movsd qword ptr 48[rsp],xmm3 + movsd qword ptr 56[rsp],xmm4 + movsd qword ptr 64[rsp],xmm5 + .endif + + mov rbp,rsp + sub rsp,40 + and rsp,-16 + .if LINUX + mov rdi,8192 + /* 512*FunctionProfile */ + att_call _malloc + .else + mov rcx,512*FunctionProfile + call allocate_memory + .endif + mov rsp,rbp + + .if LINUX + mov rsi,qword ptr [rsp] + mov rdi,qword ptr 8[rsp] + mov r8,qword ptr 16[rsp] + mov r10,qword ptr 24[rsp] + mov r11,qword ptr 32[rsp] + movlpd xmm0,qword ptr 40[rsp] + movlpd xmm1,qword ptr 48[rsp] + movlpd xmm2,qword ptr 56[rsp] + movlpd xmm3,qword ptr 64[rsp] + movlpd xmm4,qword ptr 72[rsp] + movlpd xmm5,qword ptr 80[rsp] + movlpd xmm6,qword ptr 88[rsp] + movlpd xmm7,qword ptr 96[rsp] + add rsp,104 + .else + mov r8,qword ptr [rsp] + mov r10,qword ptr 8[rsp] + mov r11,qword ptr 16[rsp] + movlpd xmm0,qword ptr 24[rsp] + movlpd xmm1,qword ptr 32[rsp] + movlpd xmm2,qword ptr 40[rsp] + movlpd xmm3,qword ptr 48[rsp] + movlpd xmm4,qword ptr 56[rsp] + movlpd xmm5,qword ptr 64[rsp] + add rsp,72 + .endif + + test rax,rax + + pop rbp + pop rdx + pop rcx + + je no_memory + + mov rbx,rax + mov rax,512 + mov qword ptr global_last_allocated_block[rip],rbx + +no_alloc: + dec rax + mov qword ptr global_n_free_records_in_block[rip],rax + lea rax,FunctionProfile[rbx] + mov qword ptr global_last_allocated_block[rip],rax + + mov rax,qword ptr global_profile_records[rip] + mov qword ptr name_[rbx],rbp + + mov qword ptr next[rbx],rax + mov qword ptr global_profile_records[rip],rbx + + mov qword ptr [rbp],rbx + pop rax + ret + +no_memory: + lea rbp,not_enough_memory_for_profiler[rip] + pop rax + att_jmp print_error + +write_profile_stack: + .if LINUX + mov r13,rsi + mov r14,rdi + .endif + mov rax,qword ptr profile_stack_pointer[rip] + + test rax,rax + je stack_not_initialised + + push rax + + mov rbp,rsp + sub rsp,40 + and rsp,-16 + .if LINUX + lea rdi,stack_trace_string[rip] + .else + lea rcx,stack_trace_string + .endif + att_call _ew_print_string + mov rsp,rbp + + pop rax + +/* mov rbp,12 */ + mov rbp,qword ptr stack_trace_depth[rip] +write_functions_on_stack: + mov rbx,qword ptr (-8)[rax] + sub rax,8 + + test rbx,rbx + je end_profile_stack + + push rax + mov rcx,qword ptr name_[rbx] + + push rbp + + .if LINUX + movsx rdx,dword ptr (-4)[rcx] + lea rdx,-4[rcx+rdx] + lea rdi,8[rcx] + mov r12,rdx + .else + mov edx,dword ptr (-4)[rcx] + add rcx,8 + + mov r12d,dword ptr [rdx] + lea r13,4[rdx] + .endif + + mov rbp,rsp + sub rsp,40 + and rsp,-16 + + att_call _ew_print_string + + .if LINUX + lea rdi,module_string[rip] + .else + lea rcx,module_string + .endif + att_call _ew_print_string + + .if LINUX + mov esi,dword ptr [r12] + lea rdi,4[r12] + .else + mov rdx,r12 + mov rcx,r13 + .endif + att_call _ew_print_text + + .if LINUX + mov rdi,'] + .else + mov rcx,'] + .endif + att_call _ew_print_char + + .if LINUX + mov rdi,10 + .else + mov rcx,10 + .endif + att_call _ew_print_char + + mov rsp,rbp + + pop rbp + pop rax + + sub rbp,1 + att_jne write_functions_on_stack + +end_profile_stack: +stack_not_initialised: + .if LINUX + mov rsi,r13 + mov rdi,r14 + .endif + ret + +init_profiler: + mov rbp,rsp + sub rsp,40 + and rsp,-16 + .if LINUX + mov r13,rsi + mov r14,rdi + mov rdi,qword ptr _ab_stack_size[rip] + att_call _malloc + mov rsi,r13 + mov rdi,r14 + .else + mov rcx,qword ptr ab_stack_size + call allocate_memory + .endif + mov rsp,rbp + + test rax,rax + je init_profiler_error + + push rax + + lea rbp,start_string[rip] + att_call allocate_function_profile_record + + pop rdx + + mov qword ptr 8[rdx],rbx + mov qword ptr [rdx],0 + add rdx,16 + mov qword ptr profile_stack_pointer[rip],rdx + ret + +init_profiler_error: + mov qword ptr profile_stack_pointer[rip],0 + lea rbp,not_enough_memory_for_profile_stack[rip] + att_jmp print_error + + + + .data + + .align 8 + +global_n_free_records_in_block: + .quad 0 +/* 0 n free records in block */ +global_last_allocated_block: + .quad 0 +/* 8 latest allocated block */ +global_profile_records: + .quad 0 +/* 16 profile record list */ + +stack_trace_depth: + .quad 12 + .align 8 + +/* m_system also defined in istartup.s */ +/* +m_system: + .quad 6 + .ascii "System" + .byte 0 + .byte 0 +*/ + .long m_system-. +start_string: + .quad 0 + .ascii "start" + .byte 0 + .align 8 +not_enough_memory_for_profile_stack: + .ascii "not enough memory for profile stack" + .byte 10 + .byte 0 +not_enough_memory_for_profiler: + .ascii "not enough memory for profiler" + .byte 10 + .byte 0 +stack_trace_string: + .ascii "Stack trace:" + .byte 10 + .byte 0 +module_string: + .ascii " [module: " + .byte 0 + .align 8 + + + +/* end */ diff --git a/macho64/startup.s b/macho64/startup.s new file mode 100644 index 0000000..b5713f2 --- /dev/null +++ b/macho64/startup.s @@ -0,0 +1,6 @@ + + .set PROFILE,0 + .set TRACE,0 + + .include "astartup.s" + diff --git a/macho64/startupTrace.s b/macho64/startupTrace.s new file mode 100644 index 0000000..1738171 --- /dev/null +++ b/macho64/startupTrace.s @@ -0,0 +1,6 @@ + + .set PROFILE,1 + .set TRACE,1 + + .include "astartup.s" + |