From 24563026c2d1d63d87844543df0bd22163fee10c Mon Sep 17 00:00:00 2001 From: John van Groningen Date: Fri, 2 Jun 2006 14:26:10 +0000 Subject: add garbage collector for 64 bit windows and linux --- acompact.asm | 1156 ++++++++++++++++++++++++++ acompact_rmark.asm | 956 +++++++++++++++++++++ acompact_rmark_prefetch.asm | 1178 ++++++++++++++++++++++++++ acopy.asm | 1306 +++++++++++++++++++++++++++++ amark.asm | 1927 +++++++++++++++++++++++++++++++++++++++++++ amark_prefetch.asm | 1748 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 8271 insertions(+) create mode 100644 acompact.asm create mode 100644 acompact_rmark.asm create mode 100644 acompact_rmark_prefetch.asm create mode 100644 acopy.asm create mode 100644 amark.asm create mode 100644 amark_prefetch.asm diff --git a/acompact.asm b/acompact.asm new file mode 100644 index 0000000..5db500e --- /dev/null +++ b/acompact.asm @@ -0,0 +1,1156 @@ + +; mark used nodes and pointers in argument parts and link backward pointers + + mov rax,qword ptr heap_size_65 + shl rax,6 + mov qword ptr heap_size_64_65,rax + + mov rax,qword ptr caf_list + + test qword ptr flags,4096 + jne pmarkr + + test rax,rax + je end_mark_cafs + +mark_cafs_lp: + push (-8)[rax] + + lea rsi,8[rax] + mov rax,qword ptr [rax] + lea rcx,[rsi+rax*8] + + mov qword ptr end_vector,rcx + + call rmark_stack_nodes + + pop rax + test rax,rax + jne mark_cafs_lp + +end_mark_cafs: + mov rsi,qword ptr stack_p + + mov rcx,qword ptr stack_top + mov qword ptr end_vector,rcx + + call rmark_stack_nodes + + call add_mark_compact_garbage_collect_time + + jmp compact_heap + +pmarkr: + test rax,rax + je end_rmarkp_cafs + +rmarkp_cafs_lp: + push (-8)[rax] + + lea rsi,8[rax] + mov rax,qword ptr [rax] + lea rcx,[rsi+rax*8] + + mov qword ptr end_vector,rcx + + call rmarkp_stack_nodes + + pop rax + test rax,rax + jne rmarkp_cafs_lp + +end_rmarkp_cafs: + mov rsi,qword ptr stack_p + + mov rcx,qword ptr stack_top + mov qword ptr end_vector,rcx + + call rmarkp_stack_nodes + + call add_mark_compact_garbage_collect_time + + jmp compact_heap + + include acompact_rmark.asm + + include acompact_rmark_prefetch.asm + +; compact the heap + +compact_heap: + + mov rcx,offset finalizer_list + mov rdx,offset free_finalizer_list + + mov rbp,qword ptr [rcx] +determine_free_finalizers_after_compact1: + lea r9,__Nil-8 + cmp rbp,r9 + je end_finalizers_after_compact1 + + mov rax,qword ptr neg_heap_p3 + add rax,rbp + mov rbx,rax + and rax,31*9 + shr rbx,8 + mov esi,dword ptr (bit_set_table2)[rax] + test esi,dword ptr [rdi+rbx*4] + je finalizer_not_used_after_compact1 + + mov rax,qword ptr [rbp] + mov rsi,rbp + jmp finalizer_find_descriptor + +finalizer_find_descriptor_lp: + and rax,-4 + mov rsi,rax + mov rax,qword ptr [rax] +finalizer_find_descriptor: + test rax,1 + jne finalizer_find_descriptor_lp + + mov qword ptr [rsi],offset e____system__kFinalizerGCTemp+2 + + cmp rbp,rcx + ja finalizer_no_reverse + + mov rax,qword ptr [rbp] + lea rsi,1[rcx] + mov qword ptr [rbp],rsi + mov qword ptr [rcx],rax + +finalizer_no_reverse: + lea rcx,8[rbp] + mov rbp,qword ptr 8[rbp] + jmp determine_free_finalizers_after_compact1 + +finalizer_not_used_after_compact1: + mov qword ptr [rbp],offset e____system__kFinalizerGCTemp+2 + + mov qword ptr [rdx],rbp + lea rdx,8[rbp] + + mov rbp ,qword ptr 8[rbp] + mov qword ptr [rcx],rbp + + jmp determine_free_finalizers_after_compact1 + +end_finalizers_after_compact1: + mov qword ptr [rdx],rbp + + mov rcx,qword ptr finalizer_list + lea r9,__Nil-8 + cmp rcx,r9 + je finalizer_list_empty + test rcx,3 + jne finalizer_list_already_reversed + mov rax ,qword ptr [rcx] + mov qword ptr [rcx],offset finalizer_list+1 + mov qword ptr finalizer_list,rax +finalizer_list_already_reversed: +finalizer_list_empty: + + mov rbp ,offset free_finalizer_list + lea r9,__Nil-8 + cmp qword ptr [rbp],r9 + je free_finalizer_list_empty + + mov qword ptr end_vector,offset free_finalizer_list+8 + + test qword ptr flags,4096 + je no_pmarkr + call rmarkp_stack_nodes + jmp free_finalizer_list_empty +no_pmarkr: + call rmark_stack_nodes + +free_finalizer_list_empty: + + mov rax,qword ptr heap_size_65 + mov rbx,rax + shl rbx,6 + + add rbx,qword ptr heap_p3 + + mov qword ptr end_heap_p3,rbx + + add rax,3 + shr rax,2 + mov r12,rax + + mov r8,qword ptr heap_vector + + lea rbx,4[r8] + neg rbx + mov qword ptr neg_heap_vector_plus_4,rbx + + mov rdi,qword ptr heap_p3 + xor rsi,rsi + jmp skip_zeros + +; %rax ,%rcx ,%rbp : free +find_non_zero_long: +skip_zeros: + sub r12,1 + jc end_move + mov esi,dword ptr [r8] + add r8,4 + test rsi,rsi + je skip_zeros +; %rbp : free +end_skip_zeros: + mov rbp,qword ptr neg_heap_vector_plus_4 + + add rbp,r8 + + shl rbp,6 + add rbp,qword ptr heap_p3 + +bsf_and_copy_nodes: + movzx rax,sil + test rax,rax + jne found_bit1 + movzx rcx,si + shr rcx,8 + jne found_bit2 + mov rax,rsi + and rax,0ff0000h + jne found_bit3 + mov rcx,rsi + shr rcx,24 + movzx rcx,byte ptr first_one_bit_table[rcx*1] + add rcx,24 + jmp copy_nodes + +found_bit3: + shr rax,16 + movzx rcx,byte ptr first_one_bit_table[rax*1] + add rcx,16 + jmp copy_nodes + +found_bit2: + movzx rcx,byte ptr first_one_bit_table[rcx*1] + add rcx,8 + jmp copy_nodes + +found_bit1: + movzx rcx,byte ptr first_one_bit_table[rax*1] + +copy_nodes: + mov rax,qword ptr [rbp+rcx*8] + shr esi,1 + lea rbp,8[rbp+rcx*8] + shr esi,cl + mov rcx,rbp + + dec rax + + test rax,2 + je begin_update_list_2 + +move_argument_part: + mov rbx,qword ptr (-18)[rax] + sub rax,2 + + test rbx,1 + je end_list_2 +find_descriptor_2: + and rbx,-4 + mov rbx,qword ptr [rbx] + test rbx,1 + jne find_descriptor_2 + +end_list_2: + mov rdx,rbx + movzx rbx,word ptr (-2)[rbx] + cmp rbx,256 + jb no_record_arguments + + movzx rdx,word ptr (-2+2)[rdx] + sub rdx,2 + jae copy_record_arguments_aa + + sub rbx,256+3 + +copy_record_arguments_all_b: + push rbx + mov rbx,qword ptr heap_vector + +update_up_list_1r: + mov rdx,rax + add rax,qword ptr neg_heap_p3 + + push rcx + + mov rcx,rax + + shr rax,8 + and rcx,31*8 + + mov ecx,dword ptr bit_set_table2[rcx*1] + mov eax,dword ptr [rbx+rax*4] + + and rax,rcx + + pop rcx + je copy_argument_part_1r + + mov rax,qword ptr [rdx] + mov qword ptr [rdx],rdi + sub rax,3 + jmp update_up_list_1r + +copy_argument_part_1r: + mov rax,qword ptr [rdx] + mov qword ptr [rdx],rdi + mov qword ptr [rdi],rax + add rdi,8 + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + shr rax,3 + + mov rbx,rax + and rbx,31 + cmp rbx,1 + jae bit_in_this_word + + dec r12 + mov esi,dword ptr [r8] + add r8,4 + + mov rbp,qword ptr neg_heap_vector_plus_4 + add rbp,r8 + shl rbp,6 + add rbp,qword ptr heap_p3 + +bit_in_this_word: + shr esi,1 + add rbp,8 + + pop rbx + +copy_b_record_argument_part_arguments: + mov rax,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rax + add rdi,8 + sub rbx,1 + jnc copy_b_record_argument_part_arguments + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +copy_record_arguments_aa: + sub rbx,256+2 + sub rbx,rdx + + push rbx + push rdx + +update_up_list_2r: + mov rdx,rax + mov rax,qword ptr [rdx] + mov rbx,3 + and rbx,rax + sub rbx,3 + jne copy_argument_part_2r + + mov qword ptr [rdx],rdi + sub rax,3 + jmp update_up_list_2r + +copy_argument_part_2r: + mov qword ptr [rdx],rdi + cmp rax,rcx + jb copy_record_argument_2 + + cmp rax,qword ptr end_heap_p3 + jae copy_record_argument_2 + + mov rdx,rax + mov rax,qword ptr [rdx] + lea rbx,1[rdi] + mov qword ptr [rdx],rbx +copy_record_argument_2: + mov qword ptr [rdi],rax + add rdi,8 + + pop rbx + sub rbx,1 + jc no_pointers_in_record + +copy_record_pointers: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jb copy_record_pointers_2 + + cmp rdx,qword ptr end_heap_p3 + jae copy_record_pointers_2 + + mov rax,qword ptr [rdx] + inc rdi + mov qword ptr [rdx],rdi + dec rdi + mov rdx,rax +copy_record_pointers_2: + mov qword ptr [rdi],rdx + add rdi,8 + sub rbx,1 + jnc copy_record_pointers + +no_pointers_in_record: + pop rbx + + sub rbx,1 + jc no_non_pointers_in_record + +copy_non_pointers_in_record: + mov rax,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rax + add rdi,8 + sub rbx,1 + jnc copy_non_pointers_in_record + +no_non_pointers_in_record: + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +no_record_arguments: + sub rbx,3 +update_up_list_2: + mov rdx,rax + mov rax,qword ptr [rax] + inc rax + mov qword ptr [rdx],rdi + test al,3 + jne copy_argument_part_2 + + sub rax,4 + jmp update_up_list_2 + +copy_argument_part_2: + dec rax + cmp rax,rcx + jc copy_arguments_1 + + cmp rax,qword ptr end_heap_p3 + jnc copy_arguments_1 + + mov rdx,rax + mov rax,qword ptr [rax] + inc rdi + mov qword ptr [rdx],rdi + dec rdi +copy_arguments_1: + mov qword ptr [rdi],rax + add rdi,8 + +copy_argument_part_arguments: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc copy_arguments_2 + + cmp rdx,qword ptr end_heap_p3 + jnc copy_arguments_2 + + mov rax,qword ptr [rdx] + inc rdi + mov qword ptr [rdx],rdi + dec rdi + mov rdx,rax +copy_arguments_2: + mov qword ptr [rdi],rdx + add rdi,8 + sub rbx,1 + jnc copy_argument_part_arguments + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +update_list_2_: + dec rax + mov qword ptr [rdx],rdi +begin_update_list_2: + mov rdx,rax + mov rax,qword ptr [rax] +update_list__2: + test rax,1 + jz end_update_list_2 + test rax,2 + jz update_list_2_ + lea rdx,(-3)[rax] + mov rax,qword ptr (-3)[rax] + jmp update_list__2 + +end_update_list_2: + mov qword ptr [rdx],rdi + + mov qword ptr [rdi],rax + add rdi,8 + + test al,2 + je move_lazy_node + + movzx rbx,word ptr (-2)[rax] + test rbx,rbx + je move_hnf_0 + + cmp rbx,256 + jae move_record + + sub rbx,2 + jc move_hnf_1 + je move_hnf_2 + +move_hnf_3: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_hnf_3_1 + + cmp rdx,qword ptr end_heap_p3 + jnc move_hnf_3_1 + + lea rax,1[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_hnf_3_1: + mov qword ptr [rdi],rdx + + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_hnf_3_2 + + cmp rdx,qword ptr end_heap_p3 + jnc move_hnf_3_2 + + lea rax,(8+2+1)[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_hnf_3_2: + mov qword ptr 8[rdi],rdx + add rdi,16 + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_hnf_2: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_hnf_2_1 + + cmp rdx,qword ptr end_heap_p3 + jnc move_hnf_2_1 + + lea rax,1[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_hnf_2_1: + mov qword ptr [rdi],rdx + + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_hnf_2_2 + + cmp rdx,qword ptr end_heap_p3 + jnc move_hnf_2_2 + + lea rax,(8+1)[rdi] + mov rbx ,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_hnf_2_2: + mov qword ptr 8[rdi],rdx + add rdi,16 + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_hnf_1: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_hnf_1_ + + cmp rdx,qword ptr end_heap_p3 + jnc move_hnf_1_ + + lea rax,1[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_hnf_1_: + mov qword ptr [rdi],rdx + add rdi,8 + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_record: + sub rbx,258 + jb move_record_1 + je move_record_2 + +move_record_3: + movzx rbx,word ptr (-2+2)[rax] + sub rbx,1 + ja move_hnf_3 + + mov rdx,qword ptr [rcx] + lea rcx,8[rcx] + jb move_record_3_1b + +move_record_3_1a: + cmp rdx,rcx + jb move_record_3_1b + + cmp rdx,qword ptr end_heap_p3 + jae move_record_3_1b + + lea rax,1[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_record_3_1b: + mov qword ptr [rdi],rdx + add rdi,8 + + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jb move_record_3_2 + + cmp rdx,qword ptr end_heap_p3 + jae move_record_3_2 + + mov rax,qword ptr neg_heap_p3 + + push rbp + + add rax,rdx + + mov rbx,qword ptr heap_vector + add rax,8 + mov rbp,rax + and rbp,31*8 + shr rax,8 + mov ebp,dword ptr bit_set_table2[rbp] + test ebp,dword ptr [rbx+rax*4] + je not_linked_record_argument_part_3_b + + mov rax,qword ptr neg_heap_p3 + add rax,rdi + + mov rbp,rax + and rbp,31*8 + shr rax,8 + mov ebp,dword ptr bit_set_table2[rbp] + or dword ptr [rbx+rax*4],ebp + pop rbp + + jmp linked_record_argument_part_3_b + +not_linked_record_argument_part_3_b: + or dword ptr [rbx+rax*4],ebp + + mov rax,qword ptr neg_heap_p3 + add rax,rdi + + mov rbp,rax + and rbp,31*8 + shr rax,8 + mov ebp,dword ptr bit_clear_table2[rbp] + and dword ptr [rbx+rax*4],ebp + pop rbp + +linked_record_argument_part_3_b: + mov rbx,qword ptr [rdx] + lea rax,(2+1)[rdi] + mov qword ptr [rdx],rax + mov rdx,rbx +move_record_3_2: + mov qword ptr [rdi],rdx + add rdi,8 + + mov rbx,qword ptr neg_heap_p3 + add rbx,rcx + shr rbx,3 + dec rbx + and rbx,31 + cmp rbx,2 + jb bit_in_next_word + + shr esi,2 + add rbp,16 + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +bit_in_next_word: + dec r12 + mov esi,dword ptr [r8] + add r8,4 + + and esi,dword ptr bit_clear_table[rbx*4] + + test rsi,rsi + je skip_zeros + jmp end_skip_zeros + +move_record_2: + cmp word ptr (-2+2)[rax],1 + ja move_hnf_2 + jb move_real_or_file + +move_record_2_ab: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jb move_record_2_1 + + cmp rdx,qword ptr end_heap_p3 + jae move_record_2_1 + + lea rax,1[rdi] + mov rbx ,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_record_2_1: + mov qword ptr [rdi],rdx + mov rbx,qword ptr [rcx] + add rcx,8 + mov qword ptr 8[rdi],rbx + add rdi,16 + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_record_1: + movzx rbx,word ptr (-2+2)[rax] + test rbx,rbx + jne move_hnf_1 + jmp move_int_bool_or_char + +move_real_or_file: + mov rax ,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rax + add rdi,8 +move_int_bool_or_char: + mov rax,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rax + add rdi,8 +copy_normal_hnf_0: + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_hnf_0: + cmp rax,offset dINT+2 + jb move_real_file_string_or_array + cmp rax,offset CHAR+2 + jbe move_int_bool_or_char + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_real_file_string_or_array: + lea r9,__STRING__+2 + cmp rax,r9 + ja move_real_or_file + jne move_array + + mov rax,qword ptr [rcx] + add rax,7 + shr rax,3 + +cp_s_arg_lp3: + mov rbx,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rbx + add rdi,8 + sub rax,1 + jnc cp_s_arg_lp3 + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_array: + test rsi,rsi + push rcx + jne bsf_and_end_array_bit + +skip_zeros_a: + sub r12,1 + mov esi,dword ptr [r8] + add r8,4 + test rsi,rsi + je skip_zeros_a + + mov rbp,qword ptr neg_heap_vector_plus_4 + add rbp,r8 + + shl rbp,6 + + add rbp,qword ptr heap_p3 + +bsf_and_end_array_bit: + mov rax,rsi + mov rdx,rsi + and rax,0ffh + jne a_found_bit1 + and rdx,0ff00h + jne a_found_bit2 + mov rax,rsi + mov rdx,rsi + and rax,0ff0000h + jne a_found_bit3 + shr rdx,24 + movzx rcx,byte ptr first_one_bit_table[rdx*1] + add rcx,24 + jmp end_array_bit +a_found_bit3: + shr rax,16 + movzx rcx,byte ptr first_one_bit_table[rax*1] + add rcx,16 + jmp end_array_bit +a_found_bit2: + shr rdx,8 + movzx rcx,byte ptr first_one_bit_table[rdx*1] + add rcx,8 + jmp end_array_bit +a_found_bit1: + movzx rcx,byte ptr first_one_bit_table[rax*1] + +end_array_bit: + lea rbx,[rbp+rcx*8] + shr esi,1 + lea rbp,8[rbp+rcx*8] + shr esi,cl + pop rcx + + cmp rcx,rbx + jne move_a_array + +move_b_array: + mov rdx,qword ptr [rcx] + mov qword ptr [rdi],rdx + mov rbx,qword ptr 8[rcx] + add rcx,8 + + movzx rax,word ptr (-2)[rbx] + add rdi,8 + test rax,rax + je move_strict_basic_array + + sub rax,256 + imul rdx,rax + mov rax,rdx + jmp cp_s_arg_lp3 + +move_strict_basic_array: + mov rax,rdx + cmp rbx,offset BOOL+2 + jne cp_s_arg_lp3 + +move_bool_array: + add rax,7 + shr rax,3 + jmp cp_s_arg_lp3 + +move_a_array: + mov rdx,rbx + sub rbx,rcx + shr rbx,3 + + push rsi + sub rbx,1 + jb end_array + mov rsi,qword ptr [rcx] + + mov rax,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rsi + + mov qword ptr [rdi],rax + + mov rax,qword ptr [rdx] + + mov rsi,qword ptr 8[rcx] + add rcx,16 + + mov qword ptr [rdx],rsi + + mov qword ptr 8[rdi],rax + add rdi,16 + + test rax,rax + je st_move_array_lp + + movzx rsi,word ptr (-2+2)[rax] + movzx rax,word ptr (-2)[rax] + sub rax,256 + cmp rax,rsi + je st_move_array_lp + +move_array_ab: + push rcx + + mov rdx,qword ptr (-16)[rdi] + mov rbx,rsi + imul rdx,rax + shl rdx,3 + + sub rax,rbx + add rdx,rcx + call reorder + + pop rcx + sub rbx,1 + sub rax,1 + + push rbx + push rax + push (-16)[rdi] + jmp st_move_array_lp_ab + +move_array_ab_lp1: + mov rax,qword ptr 16[rsp] +move_array_ab_a_elements: + mov rbx,qword ptr [rcx] + add rcx,8 + cmp rbx,rcx + jb move_array_element_ab + + cmp rbx,qword ptr end_heap_p3 + jnc move_array_element_ab + + mov rdx,rbx + mov rbx,qword ptr [rdx] + inc rdi + mov qword ptr [rdx],rdi + dec rdi +move_array_element_ab: + mov qword ptr [rdi],rbx + add rdi,8 + sub rax,1 + jnc move_array_ab_a_elements + + mov rax,qword ptr 8[rsp] +move_array_ab_b_elements: + mov rbx,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rbx + add rdi,8 + sub rax,1 + jnc move_array_ab_b_elements + +st_move_array_lp_ab: + sub qword ptr [rsp],1 + jnc move_array_ab_lp1 + + add rsp,24 + jmp end_array + +move_array_lp1: + mov rax,qword ptr [rcx] + add rcx,8 + add rdi,8 + cmp rax,rcx + jb move_array_element + + cmp rax,qword ptr end_heap_p3 + jnc move_array_element + + mov rsi,qword ptr [rax] + mov rdx,rax + mov qword ptr (-8)[rdi],rsi + lea rax,(-8+1)[rdi] + mov qword ptr [rdx],rax + + sub rbx,1 + jnc move_array_lp1 + + jmp end_array + +move_array_element: + mov qword ptr (-8)[rdi],rax +st_move_array_lp: + sub rbx,1 + jnc move_array_lp1 + +end_array: + pop rsi + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_lazy_node: + mov rdx,rax + movsxd rbx,dword ptr (-4)[rdx] + test rbx,rbx + je move_lazy_node_0 + + sub rbx,1 + jle move_lazy_node_1 + + cmp rbx,256 + jge move_closure_with_unboxed_arguments + +move_lazy_node_arguments: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_lazy_node_arguments_ + + cmp rdx,qword ptr end_heap_p3 + jnc move_lazy_node_arguments_ + + mov rax,qword ptr [rdx] + mov qword ptr [rdi],rax + lea rax,1[rdi] + add rdi,8 + mov qword ptr [rdx],rax + sub rbx,1 + jnc move_lazy_node_arguments + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_lazy_node_arguments_: + mov qword ptr [rdi],rdx + add rdi,8 + sub rbx,1 + jnc move_lazy_node_arguments + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_lazy_node_1: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_lazy_node_1_ + + cmp rdx,qword ptr end_heap_p3 + jnc move_lazy_node_1_ + + lea rax,1[rdi] + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + mov rdx,rbx +move_lazy_node_1_: + mov qword ptr [rdi],rdx + add rdi,16 + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_lazy_node_0: + add rdi,16 + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_closure_with_unboxed_arguments: + je move_closure_with_unboxed_arguments_1 + add rbx,1 + mov rax,rbx + and rbx,255 + shr rax,8 + sub rbx,rax + je move_non_pointers_of_closure + + push rax + +move_closure_with_unboxed_arguments_lp: + mov rdx,qword ptr [rcx] + add rcx,8 + cmp rdx,rcx + jc move_closure_with_unboxed_arguments_ + + cmp rdx,qword ptr end_heap_p3 + jnc move_closure_with_unboxed_arguments_ + + mov rax,qword ptr [rdx] + mov qword ptr [rdi],rax + lea rax,1[rdi] + add rdi,8 + mov qword ptr [rdx],rax + sub rbx,1 + jne move_closure_with_unboxed_arguments_lp + + pop rax + jmp move_non_pointers_of_closure + +move_closure_with_unboxed_arguments_: + mov qword ptr [rdi],rdx + add rdi,8 + sub rbx,1 + jne move_closure_with_unboxed_arguments_lp + + pop rax + +move_non_pointers_of_closure: + mov rbx,qword ptr [rcx] + add rcx,8 + mov qword ptr [rdi],rbx + add rdi,8 + sub rax,1 + jne move_non_pointers_of_closure + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +move_closure_with_unboxed_arguments_1: + mov rax,qword ptr [rcx] + mov qword ptr [rdi],rax + add rdi,16 + + test rsi,rsi + jne bsf_and_copy_nodes + jmp find_non_zero_long + +end_move: + + mov rcx,qword ptr finalizer_list + +restore_finalizer_descriptors: + lea r9,__Nil-8 + cmp rcx,r9 + je end_restore_finalizer_descriptors + + mov qword ptr [rcx],offset e____system__kFinalizer+2 + mov rcx,qword ptr 8[rcx] + jmp restore_finalizer_descriptors + +end_restore_finalizer_descriptors: + diff --git a/acompact_rmark.asm b/acompact_rmark.asm new file mode 100644 index 0000000..d173b59 --- /dev/null +++ b/acompact_rmark.asm @@ -0,0 +1,956 @@ + +rmark_stack_nodes1: + mov rbx,qword ptr [rcx] + lea rax,1[rsi] + mov qword ptr [rsi],rbx + mov qword ptr [rcx],rax + +rmark_next_stack_node: + add rsi,8 + cmp rsi,qword ptr end_vector + je end_rmark_nodes + +rmark_stack_nodes: + mov rcx,qword ptr [rsi] + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmark_next_stack_node + + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + mov ebp,dword ptr [rdi+rbx*4] + test rbp,rax + jne rmark_stack_nodes1 + + or rbp,rax + mov dword ptr [rdi+rbx*4],ebp + + mov rax,qword ptr [rcx] + call rmark_stack_node + + add rsi,8 + cmp rsi,qword ptr end_vector + jne rmark_stack_nodes + ret + +rmark_stack_node: + sub rsp,16 + mov qword ptr [rsi],rax + lea rbp,1[rsi] + mov qword ptr 8[rsp],rsi + mov rbx,-1 + mov qword ptr [rsp],0 + mov qword ptr [rcx],rbp + jmp rmark_no_reverse + +rmark_node_d1: + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmark_next_node + + jmp rmark_node_ + +rmark_hnf_2: + lea rbx,8[rcx] + mov rax,qword ptr 8[rcx] + sub rsp,16 + + mov rsi,rcx + mov rcx,qword ptr [rcx] + + mov qword ptr 8[rsp],rbx + mov qword ptr [rsp],rax + +rmark_node: + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmark_next_node + + mov rbx,rsi + +rmark_node_: + mov rdx,rax + and rax,31*8 + shr rdx,8 + mov eax,dword ptr (bit_set_table2)[rax] + mov ebp,dword ptr [rdi+rdx*4] + test rbp,rax + jne rmark_reverse_and_mark_next_node + + or rbp,rax + mov dword ptr [rdi+rdx*4],ebp + + mov rax,qword ptr [rcx] +rmark_arguments: + cmp rcx,rbx + ja rmark_no_reverse + + lea rbp,1[rsi] + mov qword ptr [rsi],rax + mov qword ptr [rcx],rbp + +rmark_no_reverse: + test al,2 + je rmark_lazy_node + + movzx rbp,word ptr (-2)[rax] + test rbp,rbp + je rmark_hnf_0 + + add rcx,8 + + cmp rbp,256 + jae rmark_record + + sub rbp,2 + je rmark_hnf_2 + jc rmark_hnf_1 + +rmark_hnf_3: + mov rdx,qword ptr 8[rcx] +rmark_hnf_3_: + mov rax,qword ptr neg_heap_p3 + add rax,rdx + + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,[rdi+rbx*4] + jne rmark_shared_argument_part + + or dword ptr [rdi+rbx*4],eax + +rmark_no_shared_argument_part: + sub rsp,16 + mov qword ptr 8[rsp],rcx + lea rsi,8[rcx] + mov rcx,qword ptr [rcx] + lea rdx,[rdx+rbp*8] + mov qword ptr [rsp],rcx + +rmark_push_hnf_args: + mov rbx,qword ptr [rdx] + sub rsp,16 + mov qword ptr 8[rsp],rdx + sub rdx,8 + mov qword ptr [rsp],rbx + + sub rbp,1 + jg rmark_push_hnf_args + + mov rcx,qword ptr [rdx] + + cmp rdx,rsi + ja rmark_no_reverse_argument_pointer + + lea rbp,3[rsi] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rbp + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmark_next_node + + mov rbx,rdx + jmp rmark_node_ + +rmark_no_reverse_argument_pointer: + mov rsi,rdx + jmp rmark_node + +rmark_shared_argument_part: + cmp rdx,rcx + ja rmark_hnf_1 + + mov rbx,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr [rdx],rax + mov qword ptr 8[rcx],rbx + jmp rmark_hnf_1 + +rmark_record: + sub rbp,258 + je rmark_record_2 + jb rmark_record_1 + +rmark_record_3: + movzx rbp,word ptr (-2+2)[rax] + mov rdx,qword ptr (16-8)[rcx] + sub rbp,1 + jb rmark_record_3_bb + je rmark_record_3_ab + sub rbp,1 + je rmark_record_3_aab + jmp rmark_hnf_3_ + +rmark_record_3_bb: + sub rcx,8 + + mov rax,qword ptr neg_heap_p3 + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + or dword ptr [rdi+rbp*4],eax + + cmp rdx,rcx + ja rmark_next_node + + add eax,eax + jne rmark_bit_in_same_word1 + inc rbp + mov rax,1 +rmark_bit_in_same_word1: + test eax,dword ptr [rdi+rbp*4] + je rmark_not_yet_linked_bb + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + add rax,16 + + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + or dword ptr [rdi+rbp*4],eax + + mov rbp,qword ptr [rdx] + lea rax,(16+2+1)[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr [rdx],rax + jmp rmark_next_node + +rmark_not_yet_linked_bb: + or dword ptr [rdi+rbp*4],eax + mov rbp,qword ptr [rdx] + lea rax,(16+2+1)[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr [rdx],rax + jmp rmark_next_node + +rmark_record_3_ab: + mov rax,qword ptr neg_heap_p3 + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + or dword ptr [rdi+rbp*4],eax + + cmp rdx,rcx + ja rmark_hnf_1 + + add eax,eax + jne rmark_bit_in_same_word2 + inc rbp + mov rax,1 +rmark_bit_in_same_word2: + test eax,dword ptr [rdi+rbp*4] + je rmark_not_yet_linked_ab + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + add rax,8 + + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + or dword ptr [rdi+rbp*4],eax + + mov rbp,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr 8[rcx],rbp + mov qword ptr [rdx],rax + jmp rmark_hnf_1 + +rmark_not_yet_linked_ab: + or dword ptr [rdi+rbp*4],eax + mov rbp,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr 8[rcx],rbp + mov qword ptr [rdx],rax + jmp rmark_hnf_1 + +rmark_record_3_aab: + mov rax,qword ptr neg_heap_p3 + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,dword ptr [rdi+rbp*4] + jne rmark_shared_argument_part + or dword ptr [rdi+rbp*4],eax + + sub rsp,16 + mov qword ptr 8[rsp],rcx + lea rsi,8[rcx] + mov rcx,qword ptr [rcx] + mov qword ptr [rsp],rcx + + mov rcx,qword ptr [rdx] + + cmp rdx,rsi + ja rmark_no_reverse_argument_pointer + + lea rbp,3[rsi] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rbp + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmark_next_node + + mov rbx,rdx + jmp rmark_node_ + +rmark_record_2: + cmp word ptr (-2+2)[rax],1 + ja rmark_hnf_2 + je rmark_hnf_1 + jmp rmark_next_node + +rmark_record_1: + cmp word ptr (-2+2)[rax],0 + jne rmark_hnf_1 + jmp rmark_next_node + +rmark_lazy_node_1: +; selectors: + jne rmark_selector_node_1 + +rmark_hnf_1: + mov rsi,rcx + mov rcx,qword ptr [rcx] + jmp rmark_node + +; selectors +rmark_indirection_node: + mov rdx,qword ptr neg_heap_p3 + sub rcx,8 + add rdx,rcx + + mov rbp,rdx + and rbp,31*8 + shr rdx,8 + mov ebp,dword ptr (bit_clear_table2)[rbp] + and dword ptr [rdi+rdx*4],ebp + + mov rdx,rcx + cmp rcx,rbx + mov rcx,qword ptr 8[rcx] + mov qword ptr [rsi],rcx + ja rmark_node_d1 + mov qword ptr [rdx],rax + jmp rmark_node_d1 + +rmark_selector_node_1: + add rbp,3 + je rmark_indirection_node + + mov rdx,qword ptr [rcx] + mov qword ptr pointer_compare_address,rbx + + mov rbx,qword ptr neg_heap_p3 + add rbx,rdx + shr rbx,3 + + add rbp,1 + jle rmark_record_selector_node_1 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + mov ebp,dword ptr (bit_set_table)[rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmark_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + je rmark_hnf_1 + + cmp word ptr (-2)[rbx],2 + jbe rmark_small_tuple_or_record + +rmark_large_tuple_or_record: + mov d2,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3 + add rbx,d2 + shr rbx,3 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + mov ebp,dword ptr (bit_set_table)[rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmark_hnf_1 + + ifdef NEW_DESCRIPTORS + mov rbx,qword ptr neg_heap_p3 + lea rbx,(-8)[rcx+rbx] + + mov eax,dword ptr (-8)[rax] + + mov d3,rbx + and d3,31*8 + shr rbx,8 + mov d3d,dword ptr (bit_clear_table2)[d3] + and dword ptr [rdi+rbx*4],d3d + + movzx eax,word ptr 4[rax] + mov rbx,qword ptr pointer_compare_address + + mov qword ptr (-8)[rcx],offset __indirection + + cmp rax,16 + jl rmark_tuple_or_record_selector_node_2 + + mov rdx,rcx + je rmark_tuple_selector_node_2 + + mov rcx,qword ptr (-24)[d2+rax] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rcx + jmp rmark_node_d1 + +rmark_tuple_selector_node_2: + mov rcx,qword ptr [d2] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rcx + jmp rmark_node_d1 + else +rmark_small_tuple_or_record: + mov rbx,qword ptr neg_heap_p3 + lea rbx,(-8)[rcx+rbx] + + push rcx + + mov rcx,rbx + and rcx,31*8 + shr rbx,8 + mov ecx,dword ptr (bit_clear_table2)[rcx] + and dword ptr [rdi+rbx*4],ecx + + mov eax,(-8)[rax] + + mov rcx,rdx + push rsi + mov eax,4[rax] + call near ptr rax + pop rsi + pop rdx + + mov qword ptr [rsi],rcx + + mov rbx,qword ptr pointer_compare_address + + mov qword ptr (-8)[rdx],offset __indirection + mov qword ptr [rdx],rcx + jmp rmark_node_d1 + endif + +rmark_record_selector_node_1: + je rmark_strict_record_selector_node_1 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + mov ebp,dword ptr (bit_set_table)[rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmark_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + je rmark_hnf_1 + + cmp word ptr (-2)[rbx],258 + jbe rmark_small_tuple_or_record + + ifdef NEW_DESCRIPTORS + mov d2,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3 + add rbx,d2 + shr rbx,3 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + mov ebp,dword ptr (bit_set_table)[rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmark_hnf_1 + +rmark_small_tuple_or_record: + mov rbx,qword ptr neg_heap_p3 + lea rbx,(-8)[rcx+rbx] + + mov eax,(-8)[rax] + + mov d3,rbx + and d3,31*8 + shr rbx,8 + mov d3d,dword ptr (bit_clear_table2)[d3] + and dword ptr [rdi+rbx*4],d3d + + movzx eax,word ptr 4[rax] + mov rbx,qword ptr pointer_compare_address + + mov qword ptr (-8)[rcx],offset __indirection + + cmp rax,16 + jle rmark_tuple_or_record_selector_node_2 + mov rdx,d2 + sub rax,24 +rmark_tuple_or_record_selector_node_2: + mov rbp,rcx + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rsi],rcx + mov qword ptr [rbp],rcx + mov rdx,rbp + jmp rmark_node_d1 + else + jmp rmark_large_tuple_or_record + endif + +rmark_strict_record_selector_node_1: + mov rbp,rbx + shr rbx,5 + and rbp,31 + mov ebp,dword ptr (bit_set_table)[rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmark_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + je rmark_hnf_1 + + cmp word ptr (-2)[rbx],258 + jbe rmark_select_from_small_record + + mov d2,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3 + add rbx,d2 + mov rbp,rbx + + shr rbx,8 + and rbp,31*8 + mov ebp,dword ptr (bit_set_table2)[rbp] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmark_hnf_1 + +rmark_select_from_small_record: + mov ebx,(-8)[rax] + sub rcx,8 + + cmp rcx,qword ptr pointer_compare_address + ja rmark_selector_pointer_not_reversed + + ifdef NEW_DESCRIPTORS + movzx eax,word ptr 4[rbx] + cmp rax,16 + jle rmark_strict_record_selector_node_2 + mov rax,qword ptr (-24)[d2+rax] + jmp rmark_strict_record_selector_node_3 +rmark_strict_record_selector_node_2: + mov rax,qword ptr [rdx+rax] +rmark_strict_record_selector_node_3: + mov qword ptr 8[rcx],rax + + movzx eax,word ptr 6[rbx] + test rax,rax + je rmark_strict_record_selector_node_5 + cmp rax,16 + jle rmark_strict_record_selector_node_4 + mov rdx,d2 + sub rax,24 +rmark_strict_record_selector_node_4: + mov rax,qword ptr [rdx+rax] + mov qword ptr 16[rcx],rax +rmark_strict_record_selector_node_5: + + mov rax,qword ptr (-8)[rbx] + else + mov qword ptr [rcx],rax + mov qword ptr [rsi],rcx + + push rsi + mov ebx,4[rbx] + call near ptr rbx + pop rsi + + mov rax,qword ptr [rcx] + endif + add rsi,1 + mov qword ptr [rcx],rsi + mov qword ptr (-1)[rsi],rax + jmp rmark_next_node + +rmark_selector_pointer_not_reversed: + ifdef NEW_DESCRIPTORS + movzx eax,word ptr 4[rbx] + cmp rax,16 + jle rmark_strict_record_selector_node_6 + mov rax,qword ptr (-24)[d2+rax] + jmp rmark_strict_record_selector_node_7 +rmark_strict_record_selector_node_6: + mov rax,qword ptr [rdx+rax] +rmark_strict_record_selector_node_7: + mov qword ptr 8[rcx],rax + + movzx eax,word ptr 6[rbx] + test rax,rax + je rmark_strict_record_selector_node_9 + cmp rax,16 + jle rmark_strict_record_selector_node_8 + mov rdx,d2 + sub rax,24 +rmark_strict_record_selector_node_8: + mov rax,qword ptr [rdx+rax] + mov qword ptr 16[rcx],rax +rmark_strict_record_selector_node_9: + + mov rax,qword ptr (-8)[rbx] + mov qword ptr [rcx],rax + else + mov ebx,4[rbx] + call near ptr rbx + endif + jmp rmark_next_node + +rmark_reverse_and_mark_next_node: + cmp rcx,rbx + ja rmark_next_node + + mov rax,qword ptr [rcx] + mov qword ptr [rsi],rax + add rsi,1 + mov qword ptr [rcx],rsi + +; %rbp ,%rbx : free + +rmark_next_node: + mov rcx,qword ptr [rsp] + mov rsi,qword ptr 8[rsp] + add rsp,16 + + cmp rcx,1 + ja rmark_node + +end_rmark_nodes: + ret + +rmark_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + je rmark_next_node + + add rcx,8 + + sub rbp,1 + jle rmark_lazy_node_1 + + cmp rbp,255 + jge rmark_closure_with_unboxed_arguments + +rmark_closure_with_unboxed_arguments_: + lea rcx,[rcx+rbp*8] + +rmark_push_lazy_args: + mov rbx,qword ptr [rcx] + sub rsp,16 + mov qword ptr 8[rsp],rcx + sub rcx,8 + mov qword ptr [rsp],rbx + sub rbp,1 + jg rmark_push_lazy_args + + mov rsi,rcx + mov rcx,qword ptr [rcx] + jmp rmark_node + +rmark_closure_with_unboxed_arguments: +; (a_size+b_size)+(b_size<<8) +; addl $1,%rbp + mov rax,rbp + and rbp,255 + shr rax,8 + sub rbp,rax +; subl $1,%rbp + jg rmark_closure_with_unboxed_arguments_ + je rmark_hnf_1 + jmp rmark_next_node + +rmark_hnf_0: + cmp rax,offset dINT+2 + je rmark_int_3 + + cmp rax,offset CHAR+2 + je rmark_char_3 + + jb rmark_no_normal_hnf_0 + + mov rbp,qword ptr neg_heap_p3 + add rbp,rcx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + mov edx,dword ptr (bit_clear_table2)[rdx] + and dword ptr [rdi+rbp*4],edx + + ifdef NEW_DESCRIPTORS + lea rdx,((-8)-2)[rax] + else + lea rdx,((-12)-2)[rax] + endif + mov qword ptr [rsi],rdx + cmp rcx,rbx + ja rmark_next_node + mov qword ptr [rcx],rax + jmp rmark_next_node + +rmark_int_3: + mov rbp,qword ptr 8[rcx] + cmp rbp,33 + jnc rmark_next_node + + shl rbp,4 + lea rdx,(small_integers)[rbp] + mov rbp,qword ptr neg_heap_p3 + mov qword ptr [rsi],rdx + add rbp,rcx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + mov edx,dword ptr (bit_clear_table2)[rdx] + and dword ptr [rdi+rbp*4],edx + + cmp rcx,rbx + ja rmark_next_node + mov qword ptr [rcx],rax + jmp rmark_next_node + +rmark_char_3: + movzx rdx,byte ptr 8[rcx] + mov rbp,qword ptr neg_heap_p3 + + shl rdx,4 + add rbp,rcx + add rdx,offset static_characters + mov qword ptr [rsi],rdx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + mov edx,dword ptr (bit_clear_table2)[rdx] + and dword ptr [rdi+rbp*4],edx + + cmp rcx,rbx + ja rmark_next_node + mov qword ptr [rcx],rax + jmp rmark_next_node + +rmark_no_normal_hnf_0: + lea r9,__ARRAY__+2 + cmp rax,r9 + jne rmark_next_node + + mov rax,qword ptr 16[rcx] + test rax,rax + je rmark_lazy_array + + movzx rdx,word ptr (-2+2)[rax] + test rdx,rdx + je rmark_b_array + + movzx rax,word ptr (-2)[rax] + test rax,rax + je rmark_b_array + + sub rax,256 + cmp rdx,rax + mov rbx,rdx + je rmark_a_record_array + +rmark_ab_record_array: + mov rdx,qword ptr 8[rcx] + add rcx,16 + push rcx + + imul rdx,rax + shl rdx,3 + + sub rax,rbx + add rcx,8 + add rdx,rcx + call reorder + + pop rcx + mov rax,rbx + imul rax,qword ptr (-8)[rcx] + jmp rmark_lr_array + +rmark_b_array: + mov rax,qword ptr neg_heap_p3 + add rax,rcx + add rax,8 + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + or dword ptr [rdi+rbp*4],eax + + jmp rmark_next_node + +rmark_a_record_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + cmp rbx,2 + jb rmark_lr_array + + imul rax,rbx + jmp rmark_lr_array + +rmark_lazy_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + +rmark_lr_array: + mov rbx,qword ptr neg_heap_p3 + add rbx,rcx + shr rbx,3 + add rbx,rax + + mov rdx,rbx + and rbx,31 + shr rdx,5 + mov ebx,dword ptr (bit_set_table)[rbx*4] + or dword ptr [rdi+rdx*4],ebx + + cmp rax,1 + jbe rmark_array_length_0_1 + + mov rdx,rcx + lea rcx,[rcx+rax*8] + + mov rax,qword ptr [rcx] + + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + + mov qword ptr [rcx],rbx + + mov rax,qword ptr (-8)[rcx] + sub rcx,8 + + mov rbx,qword ptr (-8)[rdx] + + sub rdx,8 + mov qword ptr [rcx],rbx + + mov qword ptr [rdx],rax + + push rcx + mov rsi,rdx + jmp rmark_array_nodes + +rmark_array_nodes1: + cmp rcx,rsi + ja rmark_next_array_node + + mov rbx,qword ptr [rcx] + lea rax,1[rsi] + mov qword ptr [rsi],rbx + mov qword ptr [rcx],rax + +rmark_next_array_node: + add rsi,8 + cmp rsi,qword ptr [rsp] + je end_rmark_array_node + +rmark_array_nodes: + mov rcx,qword ptr [rsi] + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmark_next_array_node + + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + mov ebp,dword ptr [rdi+rbx*4] + test rbp,rax + jne rmark_array_nodes1 + + or rbp,rax + mov dword ptr [rdi+rbx*4],ebp + + mov rax,qword ptr [rcx] + call rmark_array_node + + add rsi,8 + cmp rsi,qword ptr [rsp] + jne rmark_array_nodes + +end_rmark_array_node: + add rsp,8 + jmp rmark_next_node + +rmark_array_node: + sub rsp,16 + mov qword ptr 8[rsp],rsi + mov rbx,rsi + mov qword ptr [rsp],1 + jmp rmark_arguments + +rmark_array_length_0_1: + lea rcx,-16[rcx] + jb rmark_next_node + + mov rbx,qword ptr 24[rcx] + mov rbp,qword ptr 16[rcx] + mov qword ptr 24[rcx],rbp + mov rbp,qword ptr 8[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr 8[rcx],rbx + add rcx,8 + jmp rmark_hnf_1 + +_TEXT ends + _DATA segment +pointer_compare_address: + dq 0 +_DATA ends + _TEXT segment diff --git a/acompact_rmark_prefetch.asm b/acompact_rmark_prefetch.asm new file mode 100644 index 0000000..376a119 --- /dev/null +++ b/acompact_rmark_prefetch.asm @@ -0,0 +1,1178 @@ + +_TEXT ends + _DATA segment +rmarkp_n_queue_items_16: + dq 0 +rmarkp_queue_first: + dq 0 +rmarkp_queue: + dq 0,0,0,0,0,0,0,0 + dq 0,0,0,0,0,0,0,0 + dq 0,0,0,0,0,0,0,0 + dq 0,0,0,0,0,0,0,0 +_DATA ends + _TEXT segment + +rmarkp_stack_nodes1: + mov rbx,qword ptr [rcx] + lea rax,1[rsi] + mov qword ptr [rsi],rbx + mov qword ptr [rcx],rax + +rmarkp_next_stack_node: + add rsi,8 + cmp rsi,qword ptr end_vector + je end_rmarkp_nodes + +rmarkp_stack_nodes: + mov rcx,qword ptr [rsi] + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmarkp_next_stack_node + + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + mov ebp,dword ptr [rdi+rbx*4] + test rbp,rax + jne rmarkp_stack_nodes1 + + or rbp,rax + mov dword ptr [rdi+rbx*4],ebp + + mov rax,qword ptr [rcx] + call rmarkp_stack_node + + add rsi,8 + cmp rsi,qword ptr end_vector + jne rmarkp_stack_nodes + ret + +rmarkp_stack_node: + sub rsp,16 + mov qword ptr [rsi],rax + lea rbp,1[rsi] + mov qword ptr 8[rsp],rsi + mov rbx,-1 + mov qword ptr [rsp],0 + mov qword ptr [rcx],rbp + jmp rmarkp_no_reverse + +rmarkp_node_d1: + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmarkp_next_node + + jmp rmarkp_node_ + +rmarkp_hnf_2: + lea rbx,8[rcx] + mov rax,qword ptr 8[rcx] + sub rsp,16 + + mov rsi,rcx + mov rcx,qword ptr [rcx] + + mov qword ptr 8[rsp],rbx + mov qword ptr [rsp],rax + +rmarkp_node: + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmarkp_next_node + + mov rbx,rsi + +rmarkp_node_: + + + + mov rdx,rax + and rax,31*8 + shr rdx,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,dword ptr [rdi+rdx*4] + jne rmarkp_reverse_and_mark_next_node + + mov rbp,qword ptr rmarkp_queue_first + mov rdx,qword ptr rmarkp_n_queue_items_16 + + prefetch [rcx] + mov qword ptr rmarkp_queue[rbp],rcx + mov qword ptr rmarkp_queue+8[rbp],rsi + mov qword ptr rmarkp_queue+16[rbp],rbx + lea rbx,[rbp+rdx] + add rbp,32 + + and rbp,7*32 + and rbx,7*32 + + mov qword ptr rmarkp_queue_first,rbp + + cmp rdx,-(4*32) + je rmarkp_last_item_in_queue + +rmarkp_add_items: + mov rcx,[rsp] + cmp rcx,1 + jbe rmarkp_add_stacked_item + + mov rsi,8[rsp] + add rsp,16 + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + cmp rax,qword ptr heap_size_64_65 + jnc rmarkp_add_items + + mov rdx,rax + and rax,31*8 + shr rdx,8 + mov eax,dword ptr bit_set_table2[rax] + mov ebp,dword ptr [rdi+rdx*4] + test rbp,rax + je rmarkp_add_item + + cmp rcx,rsi + ja rmarkp_add_items + + mov rax,[rcx] + mov [rsi],rax + add rsi,1 + mov [rcx],rsi + jmp rmarkp_add_items + +rmarkp_add_stacked_item: + je rmarkp_last_item_in_queue +rmarkp_add_items2: + mov rsi,8[rsp] + add rsi,8 + cmp rsi,qword ptr end_vector + je rmarkp_last_item_in_queue + + mov rcx,[rsi] + mov 8[rsp],rsi + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + cmp rax,qword ptr heap_size_64_65 + jnc rmarkp_add_items2 + + mov rdx,rax + and rax,31*8 + shr rdx,8 + mov eax,dword ptr bit_set_table2[rax] + mov ebp,dword ptr [rdi+rdx*4] + test rbp,rax + je rmarkp_add_item2 + + mov rax,[rcx] + mov [rsi],rax + add rsi,1 + mov [rcx],rsi + jmp rmarkp_add_items2 + +rmarkp_add_item2: + prefetch [rcx] + mov rbp,qword ptr rmarkp_queue_first + mov rdx,qword ptr rmarkp_n_queue_items_16 + + mov qword ptr rmarkp_queue[rbp],rcx + mov qword ptr rmarkp_queue+8[rbp],rsi + mov qword ptr rmarkp_queue+16[rbp],-1 + add rbp,32 + and rbp,7*32 + + sub rdx,32 + + mov qword ptr rmarkp_queue_first,rbp + mov qword ptr rmarkp_n_queue_items_16,rdx + + cmp rdx,-(4*32) + jne rmarkp_add_items2 + jmp rmarkp_last_item_in_queue + +rmarkp_add_items3: + mov rsi,8[rsp] + add rsi,8 + cmp rsi,24[rsp] + je rmarkp_last_item_in_queue + + mov rcx,[rsi] + mov 8[rsp],rsi + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + cmp rax,qword ptr heap_size_64_65 + jnc rmarkp_add_items3 + + mov rdx,rax + and rax,31*8 + shr rdx,8 + mov eax,dword ptr bit_set_table2[rax] + mov ebp,[rdi+rdx*4] + test rbp,rax + je rmarkp_add_item3 + + cmp rcx,rsi + ja rmarkp_add_items3 + + mov rax,[rcx] + mov [rsi],rax + add rsi,1 + mov [rcx],rsi + jmp rmarkp_add_items3 + +rmarkp_add_item3: + prefetch [rcx] + mov rbp,qword ptr rmarkp_queue_first + mov rdx,qword ptr rmarkp_n_queue_items_16 + + mov qword ptr rmarkp_queue[rbp],rcx + mov qword ptr rmarkp_queue+8[rbp],rsi + mov qword ptr rmarkp_queue+16[rbp],rsi + add rbp,32 + and rbp,7*32 + + sub rdx,32 + + mov qword ptr rmarkp_queue_first,rbp + mov qword ptr rmarkp_n_queue_items_16,rdx + + cmp rdx,-(4*32) + jne rmarkp_add_items3 + jmp rmarkp_last_item_in_queue + +rmarkp_add_item: + prefetch [rcx] + mov rbp,qword ptr rmarkp_queue_first + mov rdx,qword ptr rmarkp_n_queue_items_16 + + mov qword ptr rmarkp_queue[rbp],rcx + mov qword ptr rmarkp_queue+8[rbp],rsi + mov qword ptr rmarkp_queue+16[rbp],rsi + add rbp,32 + and rbp,7*32 + + sub rdx,32 + + mov qword ptr rmarkp_queue_first,rbp + mov qword ptr rmarkp_n_queue_items_16,rdx + + cmp rdx,-(4*32) + jne rmarkp_add_items + +rmarkp_last_item_in_queue: + mov rcx,qword ptr rmarkp_queue[rbx] + + mov rax,qword ptr neg_heap_p3 + + mov rsi,qword ptr rmarkp_queue+8[rbx] + mov rbx,qword ptr rmarkp_queue+16[rbx] + + add rax,rcx + +rmarkp_node_no_prefetch: + + + + mov rdx,rax + and rax,31*8 + shr rdx,8 + mov eax,dword ptr (bit_set_table2)[rax] + mov ebp,dword ptr [rdi+rdx*4] + test rbp,rax + jne rmarkp_reverse_and_mark_next_node + + or rbp,rax + mov dword ptr [rdi+rdx*4],ebp + + mov rax,qword ptr [rcx] +rmarkp_arguments: + cmp rcx,rbx + ja rmarkp_no_reverse + + lea rbp,1[rsi] + mov qword ptr [rsi],rax + mov qword ptr [rcx],rbp + +rmarkp_no_reverse: + test al,2 + je rmarkp_lazy_node + + movzx rbp,word ptr (-2)[rax] + test rbp,rbp + je rmarkp_hnf_0 + + add rcx,8 + + cmp rbp,256 + jae rmarkp_record + + sub rbp,2 + je rmarkp_hnf_2 + jc rmarkp_hnf_1 + +rmarkp_hnf_3: + mov rdx,qword ptr 8[rcx] +rmarkp_hnf_3_: + mov rax,qword ptr neg_heap_p3 + add rax,rdx + + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,[rdi+rbx*4] + jne rmarkp_shared_argument_part + + or dword ptr [rdi+rbx*4],eax + +rmarkp_no_shared_argument_part: + sub rsp,16 + mov qword ptr 8[rsp],rcx + lea rsi,8[rcx] + mov rcx,qword ptr [rcx] + lea rdx,[rdx+rbp*8] + mov qword ptr [rsp],rcx + +rmarkp_push_hnf_args: + mov rbx,qword ptr [rdx] + sub rsp,16 + mov qword ptr 8[rsp],rdx + sub rdx,8 + mov qword ptr [rsp],rbx + + sub rbp,1 + jg rmarkp_push_hnf_args + + mov rcx,qword ptr [rdx] + + cmp rdx,rsi + ja rmarkp_no_reverse_argument_pointer + + lea rbp,3[rsi] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rbp + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmarkp_next_node + + mov rbx,rdx + jmp rmarkp_node_ + +rmarkp_no_reverse_argument_pointer: + mov rsi,rdx + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + cmp rax,qword ptr heap_size_64_65 + jnc rmarkp_next_node + mov rbx,rsi + jmp rmarkp_node_no_prefetch + +rmarkp_shared_argument_part: + cmp rdx,rcx + ja rmarkp_hnf_1 + + mov rbx,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr [rdx],rax + mov qword ptr 8[rcx],rbx + jmp rmarkp_hnf_1 + +rmarkp_record: + sub rbp,258 + je rmarkp_record_2 + jb rmarkp_record_1 + +rmarkp_record_3: + movzx rbp,word ptr (-2+2)[rax] + mov rdx,qword ptr (16-8)[rcx] + sub rbp,1 + jb rmarkp_record_3_bb + je rmarkp_record_3_ab + sub rbp,1 + je rmarkp_record_3_aab + jmp rmarkp_hnf_3_ + +rmarkp_record_3_bb: + sub rcx,8 + + mov rax,qword ptr neg_heap_p3 + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + or dword ptr [rdi+rbp*4],eax + + cmp rdx,rcx + ja rmarkp_next_node + + add eax,eax + jne rmarkp_bit_in_same_word1 + inc rbp + mov rax,1 +rmarkp_bit_in_same_word1: + test eax,dword ptr [rdi+rbp*4] + je rmarkp_not_yet_linked_bb + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + add rax,16 + + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + or dword ptr [rdi+rbp*4],eax + + mov rbp,qword ptr [rdx] + lea rax,(16+2+1)[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr [rdx],rax + jmp rmarkp_next_node + +rmarkp_not_yet_linked_bb: + or dword ptr [rdi+rbp*4],eax + mov rbp,qword ptr [rdx] + lea rax,(16+2+1)[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr [rdx],rax + jmp rmarkp_next_node + +rmarkp_record_3_ab: + mov rax,qword ptr neg_heap_p3 + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + or dword ptr [rdi+rbp*4],eax + + cmp rdx,rcx + ja rmarkp_hnf_1 + + add eax,eax + jne rmarkp_bit_in_same_word2 + inc rbp + mov rax,1 +rmarkp_bit_in_same_word2: + test eax,dword ptr [rdi+rbp*4] + je rmarkp_not_yet_linked_ab + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + add rax,8 + + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + or dword ptr [rdi+rbp*4],eax + + mov rbp,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr 8[rcx],rbp + mov qword ptr [rdx],rax + jmp rmarkp_hnf_1 + +rmarkp_not_yet_linked_ab: + or dword ptr [rdi+rbp*4],eax + mov rbp,qword ptr [rdx] + lea rax,(8+2+1)[rcx] + mov qword ptr 8[rcx],rbp + mov qword ptr [rdx],rax + jmp rmarkp_hnf_1 + +rmarkp_record_3_aab: + mov rax,qword ptr neg_heap_p3 + add rax,rdx + + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,dword ptr [rdi+rbp*4] + jne rmarkp_shared_argument_part + or dword ptr [rdi+rbp*4],eax + + sub rsp,16 + mov qword ptr 8[rsp],rcx + lea rsi,8[rcx] + mov rcx,qword ptr [rcx] + mov qword ptr [rsp],rcx + + mov rcx,qword ptr [rdx] + + cmp rdx,rsi + ja rmarkp_no_reverse_argument_pointer + + lea rbp,3[rsi] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rbp + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmarkp_next_node + + mov rbx,rdx + jmp rmarkp_node_ + +rmarkp_record_2: + cmp word ptr (-2+2)[rax],1 + ja rmarkp_hnf_2 + je rmarkp_hnf_1 + jmp rmarkp_next_node + +rmarkp_record_1: + cmp word ptr (-2+2)[rax],0 + jne rmarkp_hnf_1 + jmp rmarkp_next_node + +rmarkp_lazy_node_1: +; selectors: + jne rmarkp_selector_node_1 + +rmarkp_hnf_1: + mov rsi,rcx + mov rcx,qword ptr [rcx] + jmp rmarkp_node + +; selectors +rmarkp_indirection_node: + mov rdx,qword ptr neg_heap_p3 + sub rcx,8 + add rdx,rcx + + mov rbp,rdx + and rbp,31*8 + shr rdx,8 + mov ebp,dword ptr (bit_clear_table2)[rbp] + and dword ptr [rdi+rdx*4],ebp + + mov rdx,rcx + cmp rcx,rbx + mov rcx,qword ptr 8[rcx] + mov qword ptr [rsi],rcx + ja rmarkp_node_d1 + mov qword ptr [rdx],rax + jmp rmarkp_node_d1 + +rmarkp_selector_node_1: + add rbp,3 + je rmarkp_indirection_node + + mov rdx,qword ptr [rcx] + mov qword ptr pointer_compare_address,rbx + + mov rbx,qword ptr neg_heap_p3 + add rbx,rdx + shr rbx,3 + + add rbp,1 + jle rmarkp_record_selector_node_1 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + mov ebp,dword ptr (bit_set_table)[rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmarkp_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + je rmarkp_hnf_1 + + cmp word ptr (-2)[rbx],2 + jbe rmarkp_small_tuple_or_record + +rmarkp_large_tuple_or_record: + mov d2,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3 + add rbx,d2 + shr rbx,3 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + mov ebp,dword ptr (bit_set_table)[rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmarkp_hnf_1 + + ifdef NEW_DESCRIPTORS + mov rbx,qword ptr neg_heap_p3 + lea rbx,(-8)[rcx+rbx] + + mov eax,(-8)[rax] + + mov d3,rbx + and d3,31*8 + shr rbx,8 + mov d3d,dword ptr (bit_clear_table2)[d3] + and dword ptr [rdi+rbx*4],d3d + + movzx eax,word ptr 4[rax] + mov rbx,qword ptr pointer_compare_address + + mov qword ptr (-8)[rcx],offset __indirection + + cmp rax,16 + jl rmarkp_tuple_or_record_selector_node_2 + + mov rdx,rcx + je rmarkp_tuple_selector_node_2 + + mov rcx,qword ptr (-24)[d2+rax] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rcx + jmp rmarkp_node_d1 + +rmarkp_tuple_selector_node_2: + mov rcx,qword ptr [d2] + mov qword ptr [rsi],rcx + mov qword ptr [rdx],rcx + jmp rmarkp_node_d1 + else +rmarkp_small_tuple_or_record: + mov rbx,qword ptr neg_heap_p3 + lea rbx,(-8)[rcx+rbx] + + push rcx + + mov rcx,rbx + and rcx,31*8 + shr rbx,8 + mov ecx,dword ptr (bit_clear_table2)[rcx] + and dword ptr [rdi+rbx*4],ecx + + mov eax,(-8)[rax] + + mov rcx,rdx + push rsi + mov eax,4[rax] + call near ptr rax + pop rsi + pop rdx + + mov qword ptr [rsi],rcx + + mov rbx,qword ptr pointer_compare_address + + mov qword ptr (-8)[rdx],offset __indirection + mov qword ptr [rdx],rcx + jmp rmarkp_node_d1 + endif + +rmarkp_record_selector_node_1: + je rmarkp_strict_record_selector_node_1 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + mov ebp,dword ptr (bit_set_table)[rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmarkp_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + je rmarkp_hnf_1 + + cmp word ptr (-2)[rbx],258 + jbe rmarkp_small_tuple_or_record + + ifdef NEW_DESCRIPTORS + mov d2,qword ptr 16[rdx] + + mov rbx,qword ptr neg_heap_p3 + add rbx,d2 + shr rbx,3 + + mov rbp,rbx + shr rbx,5 + and rbp,31 + mov ebp,dword ptr (bit_set_table)[rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmarkp_hnf_1 + +rmarkp_small_tuple_or_record: + mov rbx,qword ptr neg_heap_p3 + lea rbx,(-8)[rcx+rbx] + + mov eax,(-8)[rax] + + mov d3,rbx + and d3,31*8 + shr rbx,8 + mov d3d,dword ptr (bit_clear_table2)[d3] + and dword ptr [rdi+rbx*4],d3d + + movzx eax,word ptr 4[rax] + mov rbx,qword ptr pointer_compare_address + + mov qword ptr (-8)[rcx],offset __indirection + + cmp rax,16 + jle rmarkp_tuple_or_record_selector_node_2 + mov rdx,d2 + sub rax,24 +rmarkp_tuple_or_record_selector_node_2: + mov rbp,rcx + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rsi],rcx + mov qword ptr [rbp],rcx + mov rdx,rbp + jmp rmarkp_node_d1 + else + jmp rmarkp_large_tuple_or_record + endif + +rmarkp_strict_record_selector_node_1: + mov rbp,rbx + shr rbx,5 + and rbp,31 + mov ebp,dword ptr (bit_set_table)[rbp*4] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmarkp_hnf_1 + + mov rbx,qword ptr [rdx] + test bl,2 + je rmarkp_hnf_1 + + cmp word ptr (-2)[rbx],258 + jbe rmarkp_select_from_small_record + + mov d2,qword ptr 16[rdx] + + mov d2,qword ptr neg_heap_p3 + add rbx,d2 + mov rbp,rbx + + shr rbx,8 + and rbp,31*8 + mov ebp,dword ptr (bit_set_table2)[rbp] + mov ebx,dword ptr [rdi+rbx*4] + and rbx,rbp + jne rmarkp_hnf_1 + +rmarkp_select_from_small_record: + mov ebx,(-8)[rax] + sub rcx,8 + + cmp rcx,qword ptr pointer_compare_address + ja rmarkp_selector_pointer_not_reversed + + ifdef NEW_DESCRIPTORS + movzx eax,word ptr 4[rbx] + cmp rax,16 + jle rmarkp_strict_record_selector_node_2 + mov rax,qword ptr (-24)[d2+rax] + jmp rmarkp_strict_record_selector_node_3 +rmarkp_strict_record_selector_node_2: + mov rax,qword ptr [rdx+rax] +rmarkp_strict_record_selector_node_3: + mov qword ptr 8[rcx],rax + + movzx eax,word ptr 6[rbx] + test rax,rax + je rmarkp_strict_record_selector_node_5 + cmp rax,16 + jle rmarkp_strict_record_selector_node_4 + mov rdx,d2 + sub rax,24 +rmarkp_strict_record_selector_node_4: + mov rax,qword ptr [rdx+rax] + mov qword ptr 16[rcx],rax +rmarkp_strict_record_selector_node_5: + + mov rax,qword ptr (-8)[rbx] + else + mov qword ptr [rcx],rax + mov qword ptr [rsi],rcx + + push rsi + mov ebx,4[rbx] + call near ptr rbx + pop rsi + + mov rax,qword ptr [rcx] + endif + add rsi,1 + mov qword ptr [rcx],rsi + mov qword ptr (-1)[rsi],rax + jmp rmarkp_next_node + +rmarkp_selector_pointer_not_reversed: + ifdef NEW_DESCRIPTORS + movzx eax,word ptr 4[rbx] + cmp rax,16 + jle rmarkp_strict_record_selector_node_6 + mov rax,qword ptr (-24)[d2+rax] + jmp rmarkp_strict_record_selector_node_7 +rmarkp_strict_record_selector_node_6: + mov rax,qword ptr [rdx+rax] +rmarkp_strict_record_selector_node_7: + mov qword ptr 8[rcx],rax + + movzx eax,word ptr 6[rbx] + test rax,rax + je rmarkp_strict_record_selector_node_9 + cmp rax,16 + jle rmarkp_strict_record_selector_node_8 + mov rdx,d2 + sub rax,24 +rmarkp_strict_record_selector_node_8: + mov rax,qword ptr [rdx+rax] + mov qword ptr 16[rcx],rax +rmarkp_strict_record_selector_node_9: + + mov rax,qword ptr (-8)[rbx] + mov qword ptr [rcx],rax + else + mov ebx,4[rbx] + call near ptr rbx + endif + jmp rmarkp_next_node + +rmarkp_reverse_and_mark_next_node: + cmp rcx,rbx + ja rmarkp_next_node + + mov rax,qword ptr [rcx] + mov qword ptr [rsi],rax + add rsi,1 + mov qword ptr [rcx],rsi + +; %rbp ,%rbx : free + +rmarkp_next_node: + mov rcx,qword ptr [rsp] + mov rsi,qword ptr 8[rsp] + add rsp,16 + + cmp rcx,1 + ja rmarkp_node + +rmarkp_next_node_: + mov rdx,qword ptr rmarkp_n_queue_items_16 + test rdx,rdx + je end_rmarkp_nodes + + sub rsp,16 + + mov rbp,qword ptr rmarkp_queue_first + + lea rbx,[rbp+rdx] + add rdx,32 + + and rbx,7*32 + + mov qword ptr rmarkp_n_queue_items_16,rdx + jmp rmarkp_last_item_in_queue + +end_rmarkp_nodes: + ret + +rmarkp_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + je rmarkp_next_node + + add rcx,8 + + sub rbp,1 + jle rmarkp_lazy_node_1 + + cmp rbp,255 + jge rmarkp_closure_with_unboxed_arguments + +rmarkp_closure_with_unboxed_arguments_: + lea rcx,[rcx+rbp*8] + +rmarkp_push_lazy_args: + mov rbx,qword ptr [rcx] + sub rsp,16 + mov qword ptr 8[rsp],rcx + sub rcx,8 + mov qword ptr [rsp],rbx + sub rbp,1 + jg rmarkp_push_lazy_args + + mov rsi,rcx + mov rcx,qword ptr [rcx] + jmp rmarkp_node + +rmarkp_closure_with_unboxed_arguments: +; (a_size+b_size)+(b_size<<8) +; addl $1,%rbp + mov rax,rbp + and rbp,255 + shr rax,8 + sub rbp,rax +; subl $1,%rbp + jg rmarkp_closure_with_unboxed_arguments_ + je rmarkp_hnf_1 + jmp rmarkp_next_node + +rmarkp_hnf_0: + cmp rax,offset dINT+2 + je rmarkp_int_3 + + cmp rax,offset CHAR+2 + je rmarkp_char_3 + + jb rmarkp_no_normal_hnf_0 + + mov rbp,qword ptr neg_heap_p3 + add rbp,rcx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + mov edx,dword ptr (bit_clear_table2)[rdx] + and dword ptr [rdi+rbp*4],edx + + ifdef NEW_DESCRIPTORS + lea rdx,((-8)-2)[rax] + else + lea rdx,((-12)-2)[rax] + endif + mov qword ptr [rsi],rdx + cmp rcx,rbx + ja rmarkp_next_node + mov qword ptr [rcx],rax + jmp rmarkp_next_node + +rmarkp_int_3: + mov rbp,qword ptr 8[rcx] + cmp rbp,33 + jnc rmarkp_next_node + + shl rbp,4 + lea rdx,(small_integers)[rbp] + mov rbp,qword ptr neg_heap_p3 + mov qword ptr [rsi],rdx + add rbp,rcx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + mov edx,dword ptr (bit_clear_table2)[rdx] + and dword ptr [rdi+rbp*4],edx + + cmp rcx,rbx + ja rmarkp_next_node + mov qword ptr [rcx],rax + jmp rmarkp_next_node + +rmarkp_char_3: + movzx rdx,byte ptr 8[rcx] + mov rbp,qword ptr neg_heap_p3 + + shl rdx,4 + add rbp,rcx + add rdx,offset static_characters + mov qword ptr [rsi],rdx + + mov rdx,rbp + and rdx,31*8 + shr rbp,8 + mov edx,dword ptr (bit_clear_table2)[rdx] + and dword ptr [rdi+rbp*4],edx + + cmp rcx,rbx + ja rmarkp_next_node + mov qword ptr [rcx],rax + jmp rmarkp_next_node + +rmarkp_no_normal_hnf_0: + lea r9,__ARRAY__+2 + cmp rax,r9 + jne rmarkp_next_node + + mov rax,qword ptr 16[rcx] + test rax,rax + je rmarkp_lazy_array + + movzx rdx,word ptr (-2+2)[rax] + test rdx,rdx + je rmarkp_b_array + + movzx rax,word ptr (-2)[rax] + test rax,rax + je rmarkp_b_array + + sub rax,256 + cmp rdx,rax + mov rbx,rdx + je rmarkp_a_record_array + +rmarkp_ab_record_array: + mov rdx,qword ptr 8[rcx] + add rcx,16 + push rcx + + imul rdx,rax + shl rdx,3 + + sub rax,rbx + add rcx,8 + add rdx,rcx + call reorder + + pop rcx + mov rax,rbx + imul rax,qword ptr (-8)[rcx] + jmp rmarkp_lr_array + +rmarkp_b_array: + mov rax,qword ptr neg_heap_p3 + add rax,rcx + add rax,8 + mov rbp,rax + and rax,31*8 + shr rbp,8 + mov eax,dword ptr (bit_set_table2)[rax] + or dword ptr [rdi+rbp*4],eax + + jmp rmarkp_next_node + +rmarkp_a_record_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + cmp rbx,2 + jb rmarkp_lr_array + + imul rax,rbx + jmp rmarkp_lr_array + +rmarkp_lazy_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + +rmarkp_lr_array: + mov rbx,qword ptr neg_heap_p3 + add rbx,rcx + shr rbx,3 + add rbx,rax + + mov rdx,rbx + and rbx,31 + shr rdx,5 + mov ebx,dword ptr (bit_set_table)[rbx*4] + or dword ptr [rdi+rdx*4],ebx + + cmp rax,1 + jbe rmarkp_array_length_0_1 + + mov rdx,rcx + lea rcx,[rcx+rax*8] + + mov rax,qword ptr [rcx] + + mov rbx,qword ptr [rdx] + mov qword ptr [rdx],rax + + mov qword ptr [rcx],rbx + + mov rax,qword ptr (-8)[rcx] + sub rcx,8 + + mov rbx,qword ptr (-8)[rdx] + + sub rdx,8 + mov qword ptr [rcx],rbx + + mov qword ptr [rdx],rax + + push rcx + mov rsi,rdx + jmp rmarkp_array_nodes + +rmarkp_array_nodes1: + cmp rcx,rsi + ja rmarkp_next_array_node + + mov rbx,qword ptr [rcx] + lea rax,1[rsi] + mov qword ptr [rsi],rbx + mov qword ptr [rcx],rax + +rmarkp_next_array_node: + add rsi,8 + cmp rsi,qword ptr [rsp] + je end_rmarkp_array_node + +rmarkp_array_nodes: + mov rcx,qword ptr [rsi] + + mov rax,qword ptr neg_heap_p3 + add rax,rcx + + cmp rax,qword ptr heap_size_64_65 + jnc rmarkp_next_array_node + + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + mov ebp,dword ptr [rdi+rbx*4] + test rbp,rax + jne rmarkp_array_nodes1 + + or rbp,rax + mov dword ptr [rdi+rbx*4],ebp + + mov rax,qword ptr [rcx] + call rmarkp_array_node + + add rsi,8 + cmp rsi,qword ptr [rsp] + jne rmarkp_array_nodes + +end_rmarkp_array_node: + add rsp,8 + jmp rmarkp_next_node + +rmarkp_array_node: + sub rsp,16 + mov qword ptr 8[rsp],rsi + mov rbx,rsi + mov qword ptr [rsp],1 + jmp rmarkp_arguments + +rmarkp_array_length_0_1: + lea rcx,-16[rcx] + jb rmarkp_next_node + + mov rbx,qword ptr 24[rcx] + mov rbp,qword ptr 16[rcx] + mov qword ptr 24[rcx],rbp + mov rbp,qword ptr 8[rcx] + mov qword ptr 16[rcx],rbp + mov qword ptr 8[rcx],rbx + add rcx,8 + jmp rmarkp_hnf_1 diff --git a/acopy.asm b/acopy.asm new file mode 100644 index 0000000..fadf47e --- /dev/null +++ b/acopy.asm @@ -0,0 +1,1306 @@ + +COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP = 1 + + push rsi + + mov rdi,heap_p2 + + mov rax,heap_size_257 + shl rax,7 + mov semi_space_size,rax + lea rsi,[rdi+rax] + + mov qword ptr (heap2_begin_and_end+8),rsi + + mov rax,qword ptr caf_list + test rax,rax + je end_copy_cafs + +copy_cafs_lp: + push (-8)[rax] + + lea rbp,8[rax] + mov rbx,qword ptr [rax] + sub rbx,1 + call copy_lp2 + + pop rax + test rax,rax + jne copy_cafs_lp + +end_copy_cafs: + mov rbx,qword ptr [rsp] + mov rbp,stack_p + sub rbx,rbp + shr rbx,3 + + sub rbx,1 + jb end_copy0 + call copy_lp2 +end_copy0: + mov rbp,heap_p2 + + jmp copy_lp1 +; +; Copy all referenced nodes to the other semi space +; + +in_hnf_1_2: + dec rbx +copy_lp2_lp1: + call copy_lp2 +copy_lp1: + cmp rbp,rdi + jae end_copy1 + + mov rax,[rbp] + add rbp,8 + test al,2 + je not_in_hnf_1 +in_hnf_1: + movzx rbx,word ptr (-2)[rax] + + test rbx,rbx + je copy_array_21 + + cmp rbx,2 + jbe in_hnf_1_2 + + cmp rbx,256 + jae copy_record_21 + + mov rax,8[rbp] + + test al,1 + jne node_without_arguments_part + + push rbx + xor rbx,rbx + + call copy_lp2 + + pop rbx + add rbp,8 + + sub rbx,2 + jmp copy_lp2_lp1 + +node_without_arguments_part: + dec rax + xor rbx,rbx + + mov 8[rbp],rax + call copy_lp2 + + add rbp,8 + jmp copy_lp1 + +copy_record_21: + sub rbx,258 + ja copy_record_arguments_3 + + movzx rbx,word ptr (-2+2)[rax] + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + jb in_hnf_1_2 + + sub rbx,1 + ja copy_lp2_lp1 + jmp copy_node_arity1 + else + jb copy_record_arguments_1 + + sub rbx,1 + ja copy_lp2_lp1 + je copy_node_arity1 + add rbp,16 + jmp copy_lp1 + +copy_record_arguments_1: + dec rbx + jmp copy_lp2_lp1 + je copy_lp2_lp1 + add rbp,8 + jmp copy_lp1 + endif + +copy_record_arguments_3: + test byte ptr 8[rbp],1 + jne record_node_without_arguments_part + + movzx rdx,word ptr (-2+2)[rax] + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + sub rdx,1 + else + test rdx,rdx + je copy_record_arguments_3b + sub rdx,1 + je copy_record_arguments_3abb + endif + + lea rcx,(3*8)[rbp+rbx*8] + push rcx + push rdx + + sub rbx,rbx + call copy_lp2 + + add rbp,8 + pop rbx + dec rbx + call copy_lp2 + + pop rbp + jmp copy_lp1 + + ife COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP +copy_record_arguments_3abb: + push rbx + sub rbx,rbx + + call copy_lp2 + + pop rbx + + lea rbp,(2*8)[rbp+rbx*8] + jmp copy_lp1 + +copy_record_arguments_3b: + lea rbp,(3*8)[rbp+rbx*8] + jmp copy_lp1 + endif + +record_node_without_arguments_part: + and qword ptr 8[rbp],-2 + + ife COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + cmp word ptr (-2+2)[rax],0 + je record_node_without_arguments_part_3b + endif + + sub rbx,rbx + call copy_lp2 + + add rbp,8 + jmp copy_lp1 + + ife COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP +record_node_without_arguments_part_3b: + add rbp,16 + jmp copy_lp1 + endif + +not_in_hnf_1: + movsxd rbx,dword ptr (-4)[rax] + cmp rbx,257 + jge copy_unboxed_closure_arguments + sub rbx,1 + jg copy_lp2_lp1 + +copy_node_arity1: + xor rbx,rbx + call copy_lp2 + + add rbp,8 + jmp copy_lp1 + +copy_unboxed_closure_arguments: + je copy_unboxed_closure_arguments1 + + xor rax,rax + mov al,bh + and rbx,255 + sub rbx,rax + + sub rbx,1 + jl copy_unboxed_closure_arguments_without_pointers + + push rax + call copy_lp2 + pop rax + +copy_unboxed_closure_arguments_without_pointers: + lea rbp,[rbp+rax*8] + jmp copy_lp1 + +copy_unboxed_closure_arguments1: + add rbp,16 + jmp copy_lp1 + +copy_array_21: + mov rbx,qword ptr 8[rbp] + add rbp,16 + test rbx,rbx + je copy_array_21_a + + movzx rax,word ptr (-2)[rbx] + movzx rbx,word ptr (-2+2)[rbx] + sub rax,256 + test rbx,rbx + je copy_array_21_b + + cmp rbx,rax + je copy_array_21_r_a + +copy_array_21_ab: + cmp qword ptr (-16)[rbp],0 + je copy_lp1 + + sub rax,rbx + shl rax,3 + sub rbx,1 + + push rbx + push rax + mov rbx,qword ptr (-16)[rbp] + sub rbx,1 + push rbx + +copy_array_21_lp_ab: + mov rbx,qword ptr 16[rsp] + call copy_lp2 + + add rbp,qword ptr 8[rsp] + sub qword ptr [rsp],1 + jnc copy_array_21_lp_ab + + add rsp,24 + jmp copy_lp1 + +copy_array_21_b: + mov rbx,qword ptr (-16)[rbp] + imul rbx,rax + lea rbp,[rbp+rbx*8] + jmp copy_lp1 + +copy_array_21_r_a: + mov rbx,qword ptr (-16)[rbp] + imul rbx,rax + sub rbx,1 + jc copy_lp1 + jmp copy_lp2_lp1 + +copy_array_21_a: + mov rbx,qword ptr (-16)[rbp] + sub rbx,1 + jc copy_lp1 + jmp copy_lp2_lp1 + +; +; Copy nodes to the other semi-space +; + +copy_lp2: + mov rdx,qword ptr [rbp] + +; selectors: +continue_after_selector_2: + mov rcx,qword ptr [rdx] + test cl,2 + je not_in_hnf_2 + +in_hnf_2: + movzx rax,word ptr (-2)[rcx] + test rax,rax + je copy_arity_0_node2 + + cmp rax,256 + jae copy_record_2 + + sub rax,2 + mov [rbp],rdi + + lea rbp,8[rbp ] + ja copy_hnf_node2_3 + + mov [rdi],rcx + jb copy_hnf_node2_1 + + inc rdi + mov rcx,8[rdx] + + mov [rdx],rdi + mov rax,16[rdx] + + sub rbx,1 + mov (8-1)[rdi],rcx + + mov (16-1)[rdi],rax + lea rdi,(24-1)[rdi] + + jae copy_lp2 + ret + +copy_hnf_node2_1: + inc rdi + mov rax,8[rdx] + + sub rbx,1 + mov [rdx],rdi + + mov (8-1)[rdi],rax + lea rdi,(16-1)[rdi] + + jae copy_lp2 + ret + +copy_hnf_node2_3: + mov [rdi],rcx + inc rdi + + mov [rdx],rdi + mov rcx,8[rdx] + + mov (8-1)[rdi],rcx + mov rcx,16[rdx] + + add rdi,24-1 + mov rdx,[rcx] + + test dl,1 + jne arguments_already_copied_2 + + mov (-8)[rdi],rdi + add rcx,8 + + mov [rdi],rdx + inc rdi + + mov (-8)[rcx],rdi + add rdi,8-1 + +cp_hnf_arg_lp2: + mov rdx,[rcx] + add rcx,8 + + mov [rdi],rdx + add rdi,8 + + dec rax + jne cp_hnf_arg_lp2 + + sub rbx,1 + jae copy_lp2 + ret + +arguments_already_copied_2: + mov (-8)[rdi],rdx + + sub rbx,1 + jae copy_lp2 + ret + +copy_arity_0_node2: + cmp rcx,offset dINT+2 + jb copy_real_file_or_string_2 + + cmp rcx,offset CHAR+2 + ja copy_normal_hnf_0_2 + +copy_int_bool_or_char_2: + mov rax,8[rdx] + je copy_char_2 + + cmp rcx,offset dINT+2 + jne no_small_int_or_char_2 + +copy_int_2: + cmp rax,33 + jae no_small_int_or_char_2 + + shl rax,4 + add rbp,8 + + add rax,offset small_integers + sub rbx,1 + + mov (-8)[rbp],rax + jae copy_lp2 + ret + +copy_char_2: + and rax,255 + + shl rax,4 + add rbp,8 + + add rax,offset static_characters + sub rbx,1 + + mov (-8)[rbp],rax + jae copy_lp2 + ret + +no_small_int_or_char_2: + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP +copy_record_node2_1_b: + endif + mov (-16)[rsi],rcx + add rbp,8 + + mov (-8)[rsi],rax + sub rsi,15 + + mov [rdx],rsi + dec rsi + + mov (-8)[rbp],rsi + + sub rbx,1 + jae copy_lp2 + ret + +copy_normal_hnf_0_2: + ifdef NEW_DESCRIPTORS + sub rcx,2-(-8) + else + sub rcx,2-(-12) + endif + sub rbx,1 + + mov [rbp],rcx + lea rbp,8[rbp] + jae copy_lp2 + ret + +copy_real_file_or_string_2: + lea r9,__STRING__+2 + cmp rcx,r9 + jbe copy_string_or_array_2 + +copy_real_or_file_2: + mov (-24)[rsi],rcx + sub rsi,24-1 + + mov [rdx],rsi + dec rsi + + mov rax,8[rdx] + mov rcx,16[rdx] + + mov [rbp],rsi + add rbp,8 + + mov 8[rsi],rax + sub rbx,1 + + mov 16[rsi],rcx + + jae copy_lp2 + ret + +already_copied_2: + dec rcx + sub rbx,1 + + mov [rbp],rcx + lea rbp,8[rbp] + + jae copy_lp2 + ret + +copy_record_2: + sub rax,258 + ja copy_record_node2_3 + + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + jb copy_record_node2_1 + + cmp word ptr (-2+2)[rcx],0 + je copy_real_or_file_2 + + mov qword ptr [rbp],rdi + mov qword ptr [rdi],rcx + + lea rcx,1[rdi] + mov rax,qword ptr 8[rdx] + + mov qword ptr [rdx],rcx + + mov qword ptr 8[rdi],rax + mov rax,qword ptr 16[rdx] + + add rbp,8 + mov qword ptr 16[rdi],rax + + add rdi,24 + sub rbx,1 + jae copy_lp2 + ret + +copy_record_node2_1: + mov rax,qword ptr 8[rdx] + + cmp word ptr (-2+2)[rcx],0 + je copy_record_node2_1_b + + mov qword ptr [rbp],rdi + mov qword ptr [rdi],rcx + + lea rcx,1[rdi] + mov qword ptr 8[rdi],rax + + mov qword ptr [rdx],rcx + add rbp,8 + + add rdi,16 + sub rbx,1 + jae copy_lp2 + ret + else + mov qword ptr [rbp],rdi + mov qword ptr [rdi],rcx + + lea rcx,1[rdi] + mov rax,qword ptr 8[rdx] + + mov qword ptr [rdx],rcx + jb copy_record_node2_1 + + mov qword ptr 8[rdi],rax + mov rax,qword ptr 16[rdx] + + add rbp,8 + mov qword ptr 16[rdi],rax + + add rdi,24 + sub rbx,1 + jae copy_lp2 + ret + +copy_record_node2_1: + add rbp,8 + mov qword ptr 8[rdi],rax + + add rdi,16 + sub rbx,1 + jae copy_lp2 + ret + endif + +copy_record_node2_3: + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + cmp word ptr (-2+2)[rcx],1 + jbe copy_record_node2_3_ab_or_b + endif + + push rax + lea rax,1[rdi] + + mov qword ptr [rdx],rax + mov rax,qword ptr 16[rdx] + + mov qword ptr [rdi],rcx + mov rdx,qword ptr 8[rdx] + + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + mov qword ptr 8[rdi],rdx + mov qword ptr [rbp],rdi + add rbp,8 + + mov rcx,rax + test byte ptr [rax],1 + jne record_arguments_already_copied_2 + else + mov rcx,rax + sub rax,qword ptr heap_p1 + + shr rax,4 + mov qword ptr 8[rdi],rdx + + mov rdx,rax + and rax,31 + + shr rdx,3 + mov qword ptr [rbp],rdi + + and rdx,-4 + mov eax,dword ptr (bit_set_table)[rax*4] + + add rdx,qword ptr heap_copied_vector + add rbp,8 + + test eax,[rdx] + jne record_arguments_already_copied_2 + + or [rdx],eax + endif + lea rdx,24[rdi] + + pop rax + mov qword ptr 16[rdi],rdx + + add rdi,25 + mov rdx,qword ptr [rcx] + + mov qword ptr [rcx],rdi + add rcx,8 + + mov qword ptr (-1)[rdi],rdx + add rdi,7 + +cp_record_arg_lp2: + mov rdx,qword ptr [rcx] + add rcx,8 + + mov qword ptr [rdi],rdx + add rdi,8 + + sub rax,1 + jne cp_record_arg_lp2 + + sub rbx,1 + jae copy_lp2 + ret + +record_arguments_already_copied_2: + mov rdx,qword ptr [rcx] + pop rax + + mov qword ptr 16[rdi],rdx + add rdi,24 + + sub rbx,1 + jae copy_lp2 + ret + + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP +copy_record_node2_3_ab_or_b: + jb copy_record_node2_3_b + + push rax + lea rax,1[rdi] + + mov qword ptr [rdx],rax + mov rax,qword ptr 16[rdx] + + mov qword ptr [rdi],rcx + mov rdx,qword ptr 8[rdx] + + mov rcx,rax + sub rax,qword ptr heap_p1 + + shr rax,4 + mov qword ptr 8[rdi],rdx + + mov rdx,rax + and rax,31 + + shr rdx,3 + mov qword ptr [rbp],rdi + + and rdx,-4 + mov eax,dword ptr (bit_set_table)[rax*4] + + add rdx,qword ptr heap_copied_vector + add rbp,8 + + test eax,[rdx] + jne record_arguments_already_copied_2 + + or [rdx],eax + pop rax + + sub rsi,8 + + shl rax,3 + sub rsi,rax + + push rsi + add rsi,1 + + mov qword ptr 16[rdi],rsi + add rdi,24 + + mov rdx,qword ptr [rcx] + jmp cp_record_arg_lp3_c + +copy_record_node2_3_b: + push rax + lea rax,(-24+1)[rsi] + + mov qword ptr [rdx],rax + mov rax,qword ptr 16[rdx] + + mov qword ptr (-24)[rsi],rcx + mov rdx,qword ptr 8[rdx] + + mov rcx,rax + sub rax,qword ptr heap_p1 + + shr rax,4 + mov qword ptr (-16)[rsi],rdx + + mov rdx,rax + and rax,31 + sub rsi,24 + + shr rdx,3 + mov qword ptr [rbp],rsi + + and rdx,-4 + mov eax,dword ptr (bit_set_table)[rax*4] + + add rdx,qword ptr heap_copied_vector + add rbp,8 + + test eax,[rdx] + jne record_arguments_already_copied_3_b + + or [rdx],eax + pop rax + + mov rdx,rsi + sub rsi,8 + + shl rax,3 + sub rsi,rax + + mov qword ptr 16[rdx],rsi + + mov rdx,qword ptr [rcx] + + push rsi + add rsi,1 + +cp_record_arg_lp3_c: + mov qword ptr [rcx],rsi + add rcx,8 + mov qword ptr (-1) [rsi],rdx + add rsi,7 + +cp_record_arg_lp3: + mov rdx,qword ptr [rcx] + add rcx,8 + + mov qword ptr [rsi],rdx + add rsi,8 + + sub rax,8 + jne cp_record_arg_lp3 + + pop rsi + + sub rbx,1 + jae copy_lp2 + ret + +record_arguments_already_copied_3_b: + mov rdx,qword ptr [rcx] + pop rax + + mov qword ptr 16 [rsi],rdx + + sub rbx,1 + jae copy_lp2 + ret + endif + +not_in_hnf_2: + test cl,1 + jne already_copied_2 + + movsxd rax,dword ptr (-4)[rcx] + test rax,rax + jle copy_arity_0_node2_ + +copy_node2_1_: + and rax,255 + sub rax,2 + jl copy_arity_1_node2 +copy_node2_3: + mov [rbp],rdi + add rbp,8 + mov [rdi],rcx + inc rdi + mov [rdx],rdi + mov rcx,8[rdx] + add rdx,16 + mov (8-1)[rdi],rcx + add rdi,16-1 + +cp_arg_lp2: + mov rcx,[rdx] + add rdx,8 + mov [rdi],rcx + add rdi,8 + sub rax,1 + jae cp_arg_lp2 + + sub rbx,1 + jae copy_lp2 + ret + +copy_arity_1_node2: +copy_arity_1_node2_: + mov [rbp],rdi + inc rdi + + add rbp,8 + mov [rdx],rdi + + mov rax,8[rdx] + mov (-1)[rdi],rcx + + mov (8-1)[rdi],rax + add rdi,24-1 + + sub rbx,1 + jae copy_lp2 + ret + +copy_indirection_2: + mov rax,rdx + mov rdx,8[rdx] + + mov rcx,[rdx] + test cl,2 + jne in_hnf_2 + + test cl,1 + jne already_copied_2 + + cmp dword ptr (-4)[rcx],-2 + je skip_indirections_2 + + mov eax,(-4)[rcx] + test rax,rax + jle copy_arity_0_node2_ + jmp copy_node2_1_ + +skip_indirections_2: + mov rdx,8[rdx] + + mov rcx,[rdx] + test cl,2 + jne update_indirection_list_2 + test cl,1 + jne update_indirection_list_2 + + cmp dword ptr (-4)[rcx],-2 + je skip_indirections_2 + +update_indirection_list_2: + lea rcx,8[rax] + mov rax,8[rax] + mov [rcx],rdx + cmp rdx,rax + jne update_indirection_list_2 + + jmp continue_after_selector_2 + +copy_selector_2: + cmp rax,-2 + je copy_indirection_2 + jl copy_record_selector_2 + + mov rax,8[rdx] + ifdef NEW_DESCRIPTORS + mov d2,[rax] + test d2b,2 + je copy_arity_1_node2_ + + mov d3d,dword ptr (-8)[rcx] + + cmp word ptr (-2)[d2],2 + jbe copy_selector_2_ + + mov d2,16[rax] + + test byte ptr [d2],1 + jne copy_arity_1_node2_ + + movzx d3,word ptr 4[d3] + mov qword ptr [rdx],offset __indirection + + cmp d3,16 + jl copy_selector_2_1 + je copy_selector_2_2 + + mov rcx,qword ptr (-24)[d2+d3] + mov qword ptr 8[rdx],rcx + mov rdx,rcx + jmp continue_after_selector_2 + +copy_selector_2_1: + mov rcx,qword ptr 8[rax] + mov qword ptr 8[rdx],rcx + mov rdx,rcx + jmp continue_after_selector_2 + +copy_selector_2_2: + mov rcx,qword ptr [d2] + mov qword ptr 8[rdx],rcx + mov rdx,rcx + jmp continue_after_selector_2 + +copy_selector_2_: + movzx d3,word ptr 4[d3] + mov qword ptr [rdx],offset __indirection + + mov rcx,qword ptr [rax+d3] + mov qword ptr 8[rdx],rcx + mov rdx,rcx + jmp continue_after_selector_2 + else + mov rax,[rax] + test al,2 + je copy_arity_1_node2_ + + cmp word ptr (-2)[rax],2 + jbe copy_selector_2_ + + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP +copy_selector_2__: + endif + mov rax,8[rdx] + mov rax,16[rax] + test byte ptr [rax],1 + jne copy_arity_1_node2_ + +copy_selector_2_: + mov eax,(-8)[rcx] + + mov rcx,8[rdx] + push rdx + push rbp + mov eax,4[rax] + call near ptr rax + pop rbp + pop rdx + + mov qword ptr [rdx],offset __indirection + mov 8[rdx],rcx + + mov rdx,rcx + jmp continue_after_selector_2 + endif + +copy_record_selector_2: + cmp rax,-3 + mov rax,qword ptr 8[rdx] + mov d2,qword ptr [rax] + je copy_strict_record_selector_2 + + test d2b,2 + je copy_arity_1_node2_ + + mov d3d,dword ptr (-8)[rcx] + + cmp word ptr (-2)[d2],258 + ifdef NEW_DESCRIPTORS + jbe copy_record_selector_2_ + else + jbe copy_selector_2_ + endif + + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + cmp word ptr (-2+2)[d2],2 + jae copy_selector_2__ + endif + + mov d4,qword ptr 16[rax] + + lea d2,(-24)[d4] + sub d4,qword ptr heap_p1 + + mov d5,d4 + and d4,31*16 + + shr d5,7 + + shr d4,2 + and d5,-4 + + add d5,qword ptr heap_copied_vector + + mov d4d,dword ptr (bit_set_table)[d4] + + and d4d,dword ptr [d5] + + ifdef NEW_DESCRIPTORS + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + je copy_record_selector_2_ + jmp copy_arity_1_node2_ +copy_selector_2__: + mov d4,qword ptr 16[rax] + lea d2,(-24)[d4] + test byte ptr [d4],1 + jne copy_arity_1_node2_ + else + jne copy_arity_1_node2_ + endif +copy_record_selector_2_: + movzx d3,word ptr 4[d3] + mov qword ptr [rdx],offset __indirection + + cmp d3,16 + jle copy_record_selector_3 + mov rax,d2 +copy_record_selector_3: + mov rcx,qword ptr [rax+d3] + mov qword ptr 8[rdx],rcx + mov rdx,rcx + jmp continue_after_selector_2 + else + jne copy_arity_1_node2_ + jmp copy_selector_2_ + endif + +copy_strict_record_selector_2: + test d2b,2 + je copy_arity_1_node2_ + + mov d3d,dword ptr (-8)[rcx] + + cmp word ptr (-2)[d2],258 + jbe copy_strict_record_selector_2_ + + if COPY_RECORDS_WITHOUT_POINTERS_TO_END_OF_HEAP + cmp word ptr (-2+2)[d2],2 + jb copy_strict_record_selector_2_b + + mov d4,qword ptr 16[rax] + lea d2,(-24)[d4] + test byte ptr [d4],1 + jne copy_arity_1_node2_ + + jmp copy_strict_record_selector_2_ + +copy_strict_record_selector_2_b: + endif + + mov d4,qword ptr 16[rax] + + lea d2,(-24)[d4] + sub d4,qword ptr heap_p1 + + mov d5,d4 + and d4,31*16 + + shr d5,7 + + shr d4,2 + and d5,-4 + + add d5,qword ptr heap_copied_vector + + mov d4d,dword ptr (bit_set_table)[d4] + + and d4d,[d5] + + jne copy_arity_1_node2_ + +copy_strict_record_selector_2_: + ifdef NEW_DESCRIPTORS + movzx rcx,word ptr 4[d3] + cmp rcx,16 + jle copy_strict_record_selector_3 + mov rcx,qword ptr [d2+rcx] + jmp copy_strict_record_selector_4 +copy_strict_record_selector_3: + mov rcx,qword ptr [rax+rcx] +copy_strict_record_selector_4: + mov qword ptr 8[rdx],rcx + + movzx rcx,word ptr 6[d3] + test rcx,rcx + je copy_strict_record_selector_6 + cmp rcx,16 + jle copy_strict_record_selector_5 + mov rax,d2 +copy_strict_record_selector_5: + mov rcx,qword ptr [rax+rcx] + mov qword ptr 16[rdx],rcx +copy_strict_record_selector_6: + + mov rcx,qword ptr (-8)[d3] + mov qword ptr [rdx],rcx + jmp in_hnf_2 + else + mov rcx,rdx + mov rdx,qword ptr 8[rdx] + + push rbp + mov eax,4[d3] + call near ptr rax + pop rbp + + mov rdx,rcx + mov rcx,qword ptr [rcx] + test cl,2 + jne in_hnf_2 + hlt + endif + +copy_arity_0_node2_: + jl copy_selector_2 + + mov (-24)[rsi],rcx + sub rsi,24 + mov [rbp],rsi + lea rax,1[rsi] + + add rbp,8 + mov [rdx],rax + + sub rbx,1 + jae copy_lp2 + ret + +copy_string_or_array_2: + mov rcx,rdx + jne copy_array_2 + mov rax,rcx + + sub rax,heap_p1 + cmp rax,semi_space_size + jae copy_string_constant + + mov rdx,8[rcx] + add rbp,8 + + add rdx,7 + push rbx + + mov rax,rdx + and rdx,-8 + + shr rax,3 + sub rsi,rdx + + mov rbx,[rcx] + add rcx,8 + + mov (-16)[rsi],rbx + sub rsi,16 + + mov (-8)[rbp],rsi + lea rdx,1[rsi] + + mov (-8)[rcx],rdx + lea rdx,8[rsi] + +cp_s_arg_lp2: + mov rbx,[rcx] + add rcx,8 + + mov [rdx],rbx + add rdx,8 + + sub rax,1 + jge cp_s_arg_lp2 + + pop rbx + sub rbx,1 + jae copy_lp2 + ret + +copy_string_constant: + mov qword ptr [rbp],rdx + add rbp,8 + + sub rbx,1 + jae copy_lp2 + ret + +copy_array_2: + push rbx + + mov rax,qword ptr 16[rcx] + test rax,rax + je copy_array_a2 + + movzx rbx,word ptr (-2)[rax] + + test rbx,rbx + je copy_strict_basic_array_2 + + sub rbx,256 + imul rbx,qword ptr 8[rcx] + jmp copy_array_a3 + +copy_array_a2: + mov rbx,qword ptr 8[rcx] +copy_array_a3: + mov rdx,rdi + lea rdi,24[rdi+rbx*8] + + mov qword ptr [rbp],rdx + mov rax,qword ptr [rcx] + + add rbp,8 + mov qword ptr [rdx],rax + + lea rax,1[rdx] + add rdx,8 + + mov qword ptr [rcx],rax + add rcx,8 + + lea rax,1[rbx] + jmp cp_s_arg_lp2 + +copy_strict_basic_array_2: + mov rbx,qword ptr 8[rcx] + cmp rax,offset BOOL+2 + je copy_bool_array_2 + +copy_int_array_2: + shl rbx,3 + lea rdx,(-24)[rsi] + + sub rdx,rbx + mov rax,qword ptr [rcx] + + shr rbx,3 + mov qword ptr [rbp],rdx + + add rbp,8 + mov rsi,rdx + + mov qword ptr [rdx],rax + lea rax,1[rdx] + + add rdx,8 + mov qword ptr [rcx],rax + + add rcx,8 + lea rax,1[rbx] + jmp cp_s_arg_lp2 + +copy_bool_array_2: + lea rax,7[rbx] + shr rax,3 + jmp copy_int_array_2 + +end_copy1: + mov heap_end_after_gc,rsi + + mov rcx,offset finalizer_list + mov rdx,offset free_finalizer_list + mov rbp,qword ptr finalizer_list + +determine_free_finalizers_after_copy: + mov rax,qword ptr [rbp] + test al,1 + je finalizer_not_used_after_copy + + mov rbp,qword ptr 8[rbp] + sub rax,1 + mov qword ptr [rcx],rax + lea rcx,8[rax] + jmp determine_free_finalizers_after_copy + +finalizer_not_used_after_copy: + lea r9,__Nil-8 + cmp rbp,r9 + je end_finalizers_after_copy + + mov qword ptr [rdx],rbp + lea rdx,8[rbp] + mov rbp,qword ptr 8[rbp] + jmp determine_free_finalizers_after_copy + +end_finalizers_after_copy: + mov qword ptr [rcx],rbp + mov qword ptr [rdx],rbp diff --git a/amark.asm b/amark.asm new file mode 100644 index 0000000..aaff6ab --- /dev/null +++ b/amark.asm @@ -0,0 +1,1927 @@ + + mov rax,qword ptr heap_size_65 + xor rbx,rbx + + mov qword ptr n_marked_words,rbx + shl rax,6 + + mov qword ptr lazy_array_list,rbx + mov qword ptr heap_size_64_65,rax + + lea rsi,(-4000)[rsp] + + mov rax,qword ptr caf_list + + mov qword ptr end_stack,rsi + + mov r10,neg_heap_p3 + mov r11,heap_size_64_65 + mov r13,qword ptr end_stack + mov r14,0 + + test rax,rax + je _end_mark_cafs + +_mark_cafs_lp: + mov rbx,qword ptr [rax] + mov rbp,qword ptr (-8)[rax] + + push rbp + lea rbp,8[rax] + lea r12,8[rax+rbx*8] + + call _mark_stack_nodes + + pop rax + test rax,rax + jne _mark_cafs_lp + +_end_mark_cafs: + mov rsi,qword ptr stack_top + mov rbp,qword ptr stack_p + + mov r12,rsi + call _mark_stack_nodes + +continue_mark_after_pmark: + mov qword ptr n_marked_words,r14 + + mov rcx,qword ptr lazy_array_list + + test rcx,rcx + je end_restore_arrays + +restore_arrays: + mov rbx ,qword ptr [rcx] + lea r9,__ARRAY__+2 + mov qword ptr [rcx],r9 + + cmp rbx,1 + je restore_array_size_1 + + lea rdx,[rcx+rbx*8] + mov rax,qword ptr 16[rdx] + test rax,rax + je restore_lazy_array + + mov rbp,rax + push rdx + + xor rdx,rdx + mov rax,rbx + movzx rbx,word ptr (-2+2)[rbp] + + div rbx + mov rbx,rax + + pop rdx + mov rax,rbp + +restore_lazy_array: + mov rdi,qword ptr 16[rcx] + mov rbp,qword ptr 8[rcx] + mov qword ptr 8[rcx],rbx + mov rsi,qword ptr 8[rdx] + mov qword ptr 16[rcx],rax + mov qword ptr 8[rdx],rbp + mov qword ptr 16[rdx],rdi + + test rax,rax + je no_reorder_array + + movzx rdx,word ptr (-2)[rax] + sub rdx,256 + movzx rbp,word ptr (-2+2)[rax] + cmp rbp,rdx + je no_reorder_array + + add rcx,24 + imul rbx,rdx + mov rax,rdx + lea rdx,[rcx+rbx*8] + mov rbx,rbp + sub rax,rbp + + call reorder + +no_reorder_array: + mov rcx,rsi + test rcx,rcx + jne restore_arrays + + jmp end_restore_arrays + +restore_array_size_1: + mov rbp,qword ptr 8[rcx] + mov rdx,qword ptr 16[rcx] + mov qword ptr 8[rcx],rbx + mov rax,qword ptr 24[rcx] + mov qword ptr 24[rcx],rbp + mov qword ptr 16[rcx],rax + + mov rcx,rdx + test rcx,rcx + jne restore_arrays + +end_restore_arrays: + mov rdi,qword ptr heap_vector + lea rcx,finalizer_list + lea rdx,free_finalizer_list + + mov rbp,qword ptr [rcx] +determine_free_finalizers_after_mark: + lea r9,__Nil-8 + cmp rbp,r9 + je end_finalizers_after_mark + + lea rax,[r10+rbp] + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov esi,dword ptr (bit_set_table2)[rax] + test esi,dword ptr [rdi+rbx*4] + je finalizer_not_used_after_mark + + lea rcx,8[rbp] + mov rbp,qword ptr 8[rbp] + jmp determine_free_finalizers_after_mark + +finalizer_not_used_after_mark: + mov qword ptr [rdx],rbp + lea rdx,8[rbp] + + mov rbp,qword ptr 8[rbp] + mov qword ptr [rcx],rbp + jmp determine_free_finalizers_after_mark + +end_finalizers_after_mark: + mov qword ptr [rdx],rbp + + call add_garbage_collect_time + + mov rax,qword ptr bit_vector_size + + mov rdi,qword ptr n_allocated_words + add rdi,qword ptr n_marked_words + shl rdi,3 + + mov rsi,rax + shl rsi,3 + + push rdx + push rax + + mov rax,rdi + mul qword ptr heap_size_multiple + shrd rax,rdx,8 + shr rdx,8 + + mov rbx,rax + test rdx,rdx + + pop rax + pop rdx + + je not_largest_heap + + mov rbx,qword ptr heap_size_65 + shl rbx,6 + +not_largest_heap: + cmp rbx,rsi + jbe no_larger_heap + + mov rsi,qword ptr heap_size_65 + shl rsi,6 + cmp rbx,rsi + jbe not_larger_than_heap + mov rbx,rsi +not_larger_than_heap: + mov rax,rbx + shr rax,3 + mov qword ptr bit_vector_size,rax +no_larger_heap: + + mov rbp,rax + + mov rdi,qword ptr heap_vector + + shr rbp,5 + + test al,31 + je no_extra_word + + mov dword ptr [rdi+rbp*4],0 + +no_extra_word: + sub rax,qword ptr n_marked_words + shl rax,3 + mov qword ptr n_last_heap_free_bytes,rax + + mov rax,qword ptr n_marked_words + shl rax,3 + add qword ptr total_gc_bytes,rax + + test qword ptr flags,2 + je _no_heap_use_message2 + + mov r12,rsp + and rsp,-16 + ifdef LINUX + mov r13,rsi + mov r14,rdi + + lea rdi,marked_gc_string_1 + else + sub rsp,32 + + lea rcx,marked_gc_string_1 + endif + call ew_print_string + + ifdef LINUX + mov rdi,qword ptr n_marked_words + shl rdi,3 + else + mov rcx,qword ptr n_marked_words + shl rcx,3 + endif + call ew_print_int + + ifdef LINUX + lea rdi,heap_use_after_gc_string_2 + else + lea rcx,heap_use_after_gc_string_2 + endif + call ew_print_string + + ifdef LINUX + mov rsi,r13 + mov rdi,r14 + endif + mov rsp,r12 + +_no_heap_use_message2: + call call_finalizers + + mov rsi,qword ptr n_allocated_words + xor rbx,rbx + + mov rcx,rdi + mov qword ptr n_free_words_after_mark,rbx + +_scan_bits: + cmp ebx,dword ptr [rcx] + je _zero_bits + mov dword ptr [rcx],ebx + add rcx,4 + sub rbp,1 + jne _scan_bits + + jmp _end_scan + +_zero_bits: + lea rdx,4[rcx] + add rcx,4 + sub rbp,1 + jne _skip_zero_bits_lp1 + jmp _end_bits + +_skip_zero_bits_lp: + test rax,rax + jne _end_zero_bits +_skip_zero_bits_lp1: + mov eax,dword ptr [rcx] + add rcx,4 + sub rbp,1 + jne _skip_zero_bits_lp + + test rax,rax + je _end_bits + mov rax,rcx + mov dword ptr (-4)[rcx],ebx + sub rax,rdx + jmp _end_bits2 + +_end_zero_bits: + mov rax,rcx + sub rax,rdx + shl rax,3 + add qword ptr n_free_words_after_mark,rax + mov dword ptr (-4)[rcx],ebx + + cmp rax,rsi + jb _scan_bits + +_found_free_memory: + mov qword ptr bit_counter,rbp + mov qword ptr bit_vector_p,rcx + + lea rbx,(-4)[rdx] + sub rbx,rdi + shl rbx,6 + mov rdi,qword ptr heap_p3 + add rdi,rbx + + mov r15,rax + lea rbx,[rdi+rax*8] + + sub r15,rsi + mov rsi,qword ptr stack_top + + mov qword ptr heap_end_after_gc,rbx + + jmp restore_registers_after_gc_and_return + +_end_bits: + mov rax,rcx + sub rax,rdx + add rax,4 +_end_bits2: + shl rax,3 + add qword ptr n_free_words_after_mark,rax + cmp rax,rsi + jae _found_free_memory + +_end_scan: + mov qword ptr bit_counter,rbp + jmp compact_gc + +; %rbp : pointer to stack element +; %rdi : heap_vector +; %rax ,%rbx ,%rcx ,%rdx ,%rsi : free + +_mark_stack_nodes: + cmp rbp,r12 + je _end_mark_nodes +_mark_stack_nodes_: + mov rcx,qword ptr [rbp] + + add rbp,8 + lea rdx,[r10+rcx] + + cmp rdx,r11 + jnc _mark_stack_nodes + + mov rbx,rdx + and rdx,31*8 + shr rbx,8 + mov esi,dword ptr (bit_set_table2)[rdx] + + test esi,dword ptr [rdi+rbx*4] + jne _mark_stack_nodes + + push rbp + push 0 + jmp _mark_arguments + +_mark_hnf_2: + cmp rsi,20000000h + jbe fits_in_word_6 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_6: + add r14,3 + +_mark_record_2_c: + mov rbx,qword ptr 8[rcx] + push rbx + + cmp rsp,r13 + jb __mark_using_reversal + +_mark_node2: +_shared_argument_part: + mov rcx,qword ptr [rcx] + +_mark_node: + lea rdx,[r10+rcx] + cmp rdx,r11 + jnc _mark_next_node + + mov rbx,rdx + and rdx,31*8 + shr rbx,8 + mov esi,dword ptr (bit_set_table2)[rdx] + + test esi,dword ptr [rdi+rbx*4] + jne _mark_next_node + +_mark_arguments: + mov rax,qword ptr [rcx] + test rax,2 + je _mark_lazy_node + + movzx rbp,word ptr (-2)[rax] + + test rbp,rbp + je _mark_hnf_0 + + or dword ptr [rdi+rbx*4],esi + add rcx,8 + + cmp rbp,256 + jae _mark_record + + sub rbp,2 + je _mark_hnf_2 + jb _mark_hnf_1 + +_mark_hnf_3: + mov rdx,qword ptr 8[rcx] + + cmp rsi,20000000h + jbe fits_in_word_1 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_1: + + add r14,3 + lea rax,[r10+rdx] + mov rbx,rax + + and rax,31*8 + shr rbx,8 + + mov esi,dword ptr (bit_set_table2)[rax] + + test esi,dword ptr [rdi+rbx*4] + jne _shared_argument_part + +_no_shared_argument_part: + or dword ptr [rdi+rbx*4],esi + add rbp,1 + + add r14,rbp + lea rax,[rax+rbp*8] + lea rdx,(-8)[rdx+rbp*8] + + cmp rax,32*8 + jbe fits_in_word_2 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_2: + + mov rbx,qword ptr [rdx] + sub rbp,2 + push rbx + +_push_hnf_args: + mov rbx,qword ptr (-8)[rdx] + sub rdx,8 + push rbx + sub rbp,1 + jge _push_hnf_args + + cmp rsp,r13 + jae _mark_node2 + + jmp __mark_using_reversal + +_mark_hnf_1: + cmp rsi,40000000h + jbe fits_in_word_4 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_4: + add r14,2 + mov rcx,qword ptr [rcx] + jmp _mark_node + +_mark_lazy_node_1: + add rcx,8 + or dword ptr [rdi+rbx*4],esi + cmp rsi,20000000h + jbe fits_in_word_3 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_3: + add r14,3 + + cmp rbp,1 + je _mark_node2 + +_mark_selector_node_1: + add rbp,2 + mov rdx,qword ptr [rcx] + je _mark_indirection_node + + lea rsi,[r10+rdx] + mov rbx,rsi + + shr rbx,8 + and rsi,31*8 + + add rbp,1 + + mov esi,dword ptr (bit_set_table2)[rsi] + jle _mark_record_selector_node_1 + + test esi,dword ptr [rdi+rbx*4] + jne _mark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + je _mark_node3 + + cmp word ptr (-2)[rbp],2 + jbe _small_tuple_or_record + +_large_tuple_or_record: + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + mov ebp,dword ptr (bit_set_table2)[rbp] + test ebp,dword ptr [rdi+rbx*4] + jne _mark_node3 + + ifdef NEW_DESCRIPTORS + mov eax,(-8)[rax] + lea rbp,__indirection + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr 4[rax] + mov rbp,rcx + + cmp rax,16 + jl _mark_tuple_selector_node_1 + mov rdx,r9 + je _mark_tuple_selector_node_2 + mov rcx,qword ptr (-24)[r9+rax] + mov qword ptr [rbp],rcx + jmp _mark_node + +_mark_tuple_selector_node_2: + mov rcx,qword ptr [r9] + mov qword ptr [rbp],rcx + jmp _mark_node + endif + +_small_tuple_or_record: + ifdef NEW_DESCRIPTORS + mov eax,(-8)[rax] + lea rbp,__indirection + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr 4[rax] + mov rbp,rcx +_mark_tuple_selector_node_1: + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rbp],rcx + else + mov eax,(-8)[rax] + push rcx + mov rcx,rdx + mov eax,4[rax] + call near ptr rax + pop rdx + + lea r9,__indirection + mov qword ptr (-8)[rdx],r9 + mov qword ptr [rdx],rcx + endif + jmp _mark_node + +_mark_record_selector_node_1: + je _mark_strict_record_selector_node_1 + + test esi,dword ptr [rdi+rbx*4] + jne _mark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + je _mark_node3 + + cmp word ptr (-2)[rbp],258 + jbe _small_tuple_or_record + ifdef NEW_DESCRIPTORS + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + mov ebp,dword ptr (bit_set_table2)[rbp] + test ebp,dword ptr [rdi+rbx*4] + jne _mark_node3 + + mov eax,(-8)[rax] + lea rbp,__indirection + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr 4[rax] + mov rbp,rcx + + cmp rax,16 + jle _mark_record_selector_node_2 + mov rdx,r9 + sub rax,24 +_mark_record_selector_node_2: + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rbp],rcx + jmp _mark_node + else + jmp _large_tuple_or_record + endif + +_mark_strict_record_selector_node_1: + test esi,dword ptr [rdi+rbx*4] + jne _mark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + je _mark_node3 + + cmp word ptr (-2)[rbp],258 + jbe _select_from_small_record + + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + mov ebp,dword ptr (bit_set_table2)[rbp] + test ebp,dword ptr [rdi+rbx*4] + jne _mark_node3 + +_select_from_small_record: + mov eax,dword ptr (-8)[rax] + sub rcx,8 + + ifdef NEW_DESCRIPTORS + movzx ebx,word ptr 4[rax] + cmp rbx,16 + jle _mark_strict_record_selector_node_2 + mov rbx,qword ptr (-24)[r9+rbx] + jmp _mark_strict_record_selector_node_3 +_mark_strict_record_selector_node_2: + mov rbx,qword ptr [rdx+rbx] +_mark_strict_record_selector_node_3: + mov qword ptr 8[rcx],rbx + + movzx ebx,word ptr 6[rax] + test rbx,rbx + je _mark_strict_record_selector_node_5 + cmp rbx,16 + jle _mark_strict_record_selector_node_4 + mov rdx,r9 + sub rbx,24 +_mark_strict_record_selector_node_4: + mov rbx,qword ptr [rdx+rbx] + mov qword ptr 16[rcx],rbx +_mark_strict_record_selector_node_5: + + mov rax,qword ptr (-8)[rax] + mov qword ptr [rcx],rax + else + mov eax,4[rax] + call near ptr rax + endif + jmp _mark_next_node + +_mark_indirection_node: +_mark_node3: + mov rcx,rdx + jmp _mark_node + +_mark_next_node: + pop rcx + test rcx,rcx + jne _mark_node + + pop rbp + cmp rbp,r12 + jne _mark_stack_nodes_ + +_end_mark_nodes: + ret + +_mark_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + je _mark_real_or_file + + cmp rbp,1 + jle _mark_lazy_node_1 + + cmp rbp,256 + jge _mark_closure_with_unboxed_arguments + inc rbp + or dword ptr [rdi+rbx*4],esi + + add r14,rbp + lea rdx,[rdx+rbp*8] + lea rcx,[rcx+rbp*8] + + cmp rdx,32*8 + jbe fits_in_word_7 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_7: + sub rbp,3 +_push_lazy_args: + mov rbx,qword ptr (-8)[rcx] + sub rcx,8 + push rbx + sub rbp,1 + jge _push_lazy_args + + sub rcx,8 + + cmp rsp,r13 + jae _mark_node2 + + jmp __mark_using_reversal + +_mark_closure_with_unboxed_arguments: + mov rax,rbp + and rbp,255 + sub rbp,1 + je _mark_real_or_file + + shr rax,8 + add rbp,2 + + or dword ptr [rdi+rbx*4],esi + add r14,rbp + lea rdx,[rdx+rbp*8] + + sub rbp,rax + + cmp rdx,32*8 + jbe fits_in_word_7_ + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_7_: + sub rbp,2 + jl _mark_next_node + + lea rcx,16[rcx+rbp*8] + jne _push_lazy_args + +_mark_closure_with_one_boxed_argument: + mov rcx,qword ptr (-8)[rcx] + jmp _mark_node + +_mark_hnf_0: + lea r9,dINT+2 + cmp rax,r9 + jb _mark_real_file_or_string + + or dword ptr [rdi+rbx*4],esi + + lea r9,CHAR+2 + cmp rax,r9 + ja _mark_normal_hnf_0 + +_mark_bool: + add r14,2 + + cmp rsi,40000000h + jbe _mark_next_node + + or dword ptr 4[rdi+rbx*4],1 + jmp _mark_next_node + +_mark_normal_hnf_0: + inc r14 + jmp _mark_next_node + +_mark_real_file_or_string: + lea r9,__STRING__+2 + cmp rax,r9 + jbe _mark_string_or_array + +_mark_real_or_file: + or dword ptr [rdi+rbx*4],esi + add r14,3 + + cmp rsi,20000000h + jbe _mark_next_node + + or dword ptr 4[rdi+rbx*4],1 + jmp _mark_next_node + +_mark_record: + sub rbp,258 + je _mark_record_2 + jl _mark_record_1 + +_mark_record_3: + add r14,3 + + cmp rsi,20000000h + jbe fits_in_word_13 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_13: + mov rdx,qword ptr 8[rcx] + + movzx rbx,word ptr (-2+2)[rax] + lea rsi,[r10+rdx] + + mov rax,rsi + and rsi,31*8 + + shr rax,8 + sub rbx,1 + + mov edx,dword ptr (bit_set_table2)[rsi] + jb _mark_record_3_bb + + test edx,dword ptr [rdi+rax*4] + jne _mark_node2 + + add rbp,1 + or dword ptr [rdi+rax*4],edx + add r14,rbp + lea rsi,[rsi+rbp*8] + + cmp rsi,32*8 + jbe _push_record_arguments + or dword ptr 4[rdi+rax*4],1 +_push_record_arguments: + mov rdx,qword ptr 8[rcx] + mov rbp,rbx + shl rbx,3 + add rdx,rbx + sub rbp,1 + jge _push_hnf_args + + jmp _mark_node2 + +_mark_record_3_bb: + test edx,dword ptr [rdi+rax*4] + jne _mark_next_node + + add rbp,1 + or dword ptr [rdi+rax*4],edx + add r14,rbp + lea rsi,[rsi+rbp*8] + + cmp rsi,32*8 + jbe _mark_next_node + + or dword ptr 4[rdi+rax*4],1 + jmp _mark_next_node + +_mark_record_2: + cmp rsi,20000000h + jbe fits_in_word_12 + or dword ptr 4[rdi+rbx*4],1 +fits_in_word_12: + add r14,3 + + cmp word ptr (-2+2)[rax],1 + ja _mark_record_2_c + je _mark_node2 + jmp _mark_next_node + +_mark_record_1: + cmp word ptr (-2+2)[rax],0 + jne _mark_hnf_1 + + jmp _mark_bool + +_mark_string_or_array: + je _mark_string_ + +_mark_array: + mov rbp,qword ptr 16[rcx] + test rbp,rbp + je _mark_lazy_array + + movzx rax,word ptr (-2)[rbp] + + test rax,rax + je _mark_strict_basic_array + + movzx rbp,word ptr (-2+2)[rbp] + test rbp,rbp + je _mark_b_record_array + + cmp rsp,r13 + jb _mark_array_using_reversal + + sub rax,256 + cmp rax,rbp + je _mark_a_record_array + +_mark_ab_record_array: + or dword ptr [rdi+rbx*4],esi + mov rbp,qword ptr 8[rcx] + + imul rax,rbp + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae _end_set_ab_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae _last_ab_array_bits + +_mark_ab_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + jb _mark_ab_array_lp + +_last_ab_array_bits: + or dword ptr [rdi+rbx*4],ebp + +_end_set_ab_array_bits: + mov rax,qword ptr 8[rcx] + mov rdx,qword ptr 16[rcx] + movzx rbx,word ptr (-2+2)[rdx] + movzx rdx,word ptr (-2)[rdx] + shl rbx,3 + lea rdx,(-2048)[rdx*8] + push rbx + push rdx + lea rbp,24[rcx] + push r12 + jmp _mark_ab_array_begin + +_mark_ab_array: + mov rbx,qword ptr 16[rsp] + push rax + push rbp + lea r12,[rbp+rbx] + + call _mark_stack_nodes + + mov rbx,qword ptr (8+16)[rsp] + pop rbp + pop rax + add rbp,rbx +_mark_ab_array_begin: + sub rax,1 + jnc _mark_ab_array + + pop r12 + add rsp,16 + jmp _mark_next_node + +_mark_a_record_array: + or dword ptr [rdi+rbx*4],esi + mov rbp,qword ptr 8[rcx] + + imul rax,rbp + push rax + + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae _end_set_a_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae _last_a_array_bits + +_mark_a_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + jb _mark_a_array_lp + +_last_a_array_bits: + or dword ptr [rdi+rbx*4],ebp + +_end_set_a_array_bits: + pop rax + lea rbp,24[rcx] + + push r12 + lea r12,24[rcx+rax*8] + + call _mark_stack_nodes + + pop r12 + jmp _mark_next_node + +_mark_lazy_array: + cmp rsp,r13 + jb _mark_array_using_reversal + + or dword ptr [rdi+rbx*4],esi + mov rax,qword ptr 8[rcx] + + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae _end_set_lazy_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae _last_lazy_array_bits + +_mark_lazy_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + jb _mark_lazy_array_lp + +_last_lazy_array_bits: + or dword ptr [rdi+rbx*4],ebp + +_end_set_lazy_array_bits: + mov rax,qword ptr 8[rcx] + lea rbp,24[rcx] + + push r12 + lea r12,24[rcx+rax*8] + + call _mark_stack_nodes + + pop r12 + jmp _mark_next_node + +_mark_array_using_reversal: + push 0 + mov rsi,1 + jmp __mark_node + +_mark_strict_basic_array: + mov rax,qword ptr 8[rcx] + cmp rbp,offset BOOL+2 + je _mark_strict_bool_array +_mark_strict_real_array: +_mark_strict_int_array: + add rax,3 + jmp _mark_basic_array_ +_mark_strict_bool_array: + add rax,24+7 + shr rax,3 + jmp _mark_basic_array_ + +_mark_b_record_array: + mov rbp,qword ptr 8[rcx] + sub rax,256 + imul rax,rbp + add rax,3 + jmp _mark_basic_array_ + +_mark_string_: + mov rax,qword ptr 8[rcx] + add rax,16+7 + shr rax,3 + +_mark_basic_array_: + or dword ptr [rdi+rbx*4],esi + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae _mark_next_node + + inc rbx + mov rbp,1 + cmp rbx,rax + jae _last_string_bits + +_mark_string_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + jb _mark_string_lp + +_last_string_bits: + or dword ptr [rdi+rbx*4],ebp + jmp _mark_next_node + +__end_mark_using_reversal: + pop rdx + test rdx,rdx + je _mark_next_node + mov qword ptr [rdx],rcx + jmp _mark_next_node + +__mark_using_reversal: + push rcx + mov rsi,1 + mov rcx,qword ptr [rcx] + jmp __mark_node + +__mark_arguments: + mov rax,qword ptr [rcx] + test al,2 + je __mark_lazy_node + + movzx rbp,word ptr (-2)[rax] + test rbp,rbp + je __mark_hnf_0 + + add rcx,8 + + cmp rbp,256 + jae __mark__record + + sub rbp,2 + je __mark_hnf_2 + jb __mark_hnf_1 + +__mark_hnf_3: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + + or dword ptr [rdi+rbx*4],edx + + cmp rdx,20000000h + + mov rax,qword ptr 8[rcx] + + jbe fits__in__word__1 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word__1: + add rax,r10 + + mov rbx,rax + and rax,31*8 + + shr rbx,8 + + mov edx,dword ptr (bit_set_table2)[rax] + test edx,dword ptr [rdi+rbx*4] + jne __shared_argument_part + +__no_shared_argument_part: + or dword ptr [rdi+rbx*4],edx + mov rdx,qword ptr 8[rcx] + + add rbp,1 + mov qword ptr 8[rcx],rsi + + add r14,rbp + add rcx,8 + + shl rbp,3 + or qword ptr [rdx],1 + + add rax,rbp + add rdx,rbp + + cmp rax,32*8 + jbe fits__in__word__2 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word__2: + + mov rbp ,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rcx + lea rsi,(-8)[rdx] + mov rcx,rbp + jmp __mark_node + +__mark_hnf_1: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,2 + or dword ptr [rdi+rbx*4],edx + cmp rdx,40000000h + jbe __shared_argument_part + or dword ptr 4[rdi+rbx*4],1 +__shared_argument_part: + mov rbp,qword ptr [rcx] + mov qword ptr [rcx],rsi + lea rsi,2[rcx] + mov rcx,rbp + jmp __mark_node + +__mark_no_selector_2: + pop rbx +__mark_no_selector_1: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,20000000h + jbe __shared_argument_part + + or dword ptr 4[rdi+rbx*4],1 + jmp __shared_argument_part + +__mark_lazy_node_1: + je __mark_no_selector_1 + +__mark_selector_node_1: + add rbp,2 + je __mark_indirection_node + + add rbp,1 + + push rbx + mov rbp,qword ptr [rcx] + push rax + lea rax,[r10+rbp] + + jle __mark_record_selector_node_1 + + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,dword ptr [rdi+rbx*4] + pop rax + jne __mark_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + je __mark_no_selector_2 + + cmp word ptr (-2)[rbx],2 + jbe __small_tuple_or_record + +__large_tuple_or_record: + mov r8,qword ptr 16[rbp] + mov r9,r8 + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + mov r8d,dword ptr (bit_set_table2)[r8] + test r8d,dword ptr [rdi+rbx*4] + jne __mark_no_selector_2 + + ifdef NEW_DESCRIPTORS + mov eax,dword ptr (-8)[rax] + lea rdx,__indirection + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr 4[rax] + mov r8,rcx + + cmp rax,16 + jl __mark_tuple_selector_node_1 + je __mark_tuple_selector_node_2 + mov rcx,qword ptr (-24)[r9+rax] + mov qword ptr [r8],rcx + jmp __mark_node + +__mark_tuple_selector_node_2: + mov rcx,qword ptr [r9] + mov qword ptr [r8],rcx + jmp __mark_node + endif + +__small_tuple_or_record: + ifdef NEW_DESCRIPTORS + mov eax,dword ptr (-8)[rax] + lea rdx,__indirection + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr 4[rax] + mov r8,rcx +__mark_tuple_selector_node_1: + mov rcx,qword ptr [rbp+rax] + mov qword ptr [r8],rcx + jmp __mark_node + else + mov eax,(-8)[rax] + pop rbx + + push rcx + mov rcx,qword ptr [rcx] + mov eax,4[rax] + call near ptr rax + pop rdx + + mov qword ptr (-8)[rdx],offset __indirection + mov qword ptr [rdx],rcx + endif + jmp __mark_node + +__mark_record_selector_node_1: + je __mark_strict_record_selector_node_1 + + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,dword ptr [rdi+rbx*4] + pop rax + jne __mark_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + je __mark_no_selector_2 + + cmp word ptr (-2)[rbx],258 + ifdef NEW_DESCRIPTORS + jbe __small_record + + mov r8,qword ptr 16[rbp] + mov r9,r8 + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + mov r8d,dword ptr (bit_set_table2)[r8] + test r8d,dword ptr [rdi+rbx*4] + jne __mark_no_selector_2 + +__small_record: + mov eax,(-8)[rax] + lea rdx,__indirection + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr 4[rax] + mov r8,rcx + + cmp rax,16 + jle __mark_record_selector_node_2 + mov rbp,r9 + sub rax,24 +__mark_record_selector_node_2: + mov rcx,qword ptr [rbp+rax] + mov qword ptr [r8],rcx + jmp __mark_node + else + jbe __small_tuple_or_record + jmp __large_tuple_or_record + endif + +__mark_strict_record_selector_node_1: + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,dword ptr [rdi+rbx *4] + pop rax + jne __mark_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + je __mark_no_selector_2 + + cmp word ptr (-2)[rbx],258 + jle __select_from_small_record + + mov r8,qword ptr 16[rbp] + mov r9,r8 + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + mov r8d,dword ptr (bit_set_table2)[r8] + test r8d,dword ptr [rdi+rbx*4] + jne __mark_no_selector_2 + +__select_from_small_record: + ifdef NEW_DESCRIPTORS + mov eax,(-8)[rax] + sub rcx,8 + + movzx ebx,word ptr 4[rax] + cmp rbx,16 + jle __mark_strict_record_selector_node_2 + mov rbx,qword ptr (-24)[r9+rbx] + jmp __mark_strict_record_selector_node_3 +__mark_strict_record_selector_node_2: + mov rbx,qword ptr [rbp+rbx] +__mark_strict_record_selector_node_3: + mov qword ptr 8[rcx],rbx + + movzx ebx,word ptr 6[rax] + test rbx,rbx + je __mark_strict_record_selector_node_5 + cmp rbx,16 + jle __mark_strict_record_selector_node_4 + mov rbp,r9 + sub rbx,24 +__mark_strict_record_selector_node_4: + mov rbx,qword ptr [rbp+rbx] + mov qword ptr 16[rcx],rbx +__mark_strict_record_selector_node_5: + pop rbx + + mov rax,qword ptr (-8)[rax] + mov qword ptr [rcx],rax + else + mov eax,(-8)[rax] + pop rbx + mov rdx,qword ptr [rcx] + sub rcx,8 + mov eax,4[rax] + call near ptr rax + endif + jmp __mark_node + +__mark_indirection_node: + mov rcx,qword ptr [rcx] + jmp __mark_node + +__mark_hnf_2: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,20000000h + jbe fits__in__word__6 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word__6: + +__mark_record_2_c: + mov rax,qword ptr [rcx] + mov rbp,qword ptr 8[rcx] + or rax,2 + mov qword ptr 8[rcx],rsi + mov qword ptr [rcx],rax + lea rsi,8[rcx] + mov rcx,rbp + +__mark_node: + lea rdx,[r10+rcx] + cmp rdx,r11 + jae __mark_next_node + + mov rbx,rdx + and rdx,31*8 + shr rbx,8 + mov ebp,dword ptr (bit_set_table2)[rdx] + test ebp,dword ptr [rdi+rbx*4] + je __mark_arguments + +__mark_next_node: + test rsi,3 + jne __mark_parent + + mov rbp,qword ptr (-8)[rsi] + mov rdx,qword ptr [rsi] + mov qword ptr [rsi],rcx + mov qword ptr (-8)[rsi],rdx + sub rsi,8 + + mov rcx,rbp + and rbp,3 + and rcx,-4 + or rsi,rbp + jmp __mark_node + +__mark_parent: + mov rbx,rsi + and rsi,-4 + je __end_mark_using_reversal + + and rbx,3 + mov rbp,qword ptr [rsi] + mov qword ptr [rsi],rcx + + sub rbx,1 + je __argument_part_parent + + lea rcx,(-8)[rsi] + mov rsi,rbp + jmp __mark_next_node + +__argument_part_parent: + and rbp,-4 + mov rdx,rsi + mov rcx,qword ptr (-8)[rbp] + mov rbx,qword ptr [rbp] + mov qword ptr (-8)[rbp],rbx + mov qword ptr [rbp],rdx + lea rsi,(2-8)[rbp] + jmp __mark_node + +__mark_lazy_node: + movsxd rbp,dword ptr(-4)[rax] + test rbp,rbp + je __mark_real_or_file + + add rcx,8 + cmp rbp,1 + jle __mark_lazy_node_1 + cmp rbp,256 + jge __mark_closure_with_unboxed_arguments + + add rbp,1 + mov rax,rdx + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,rbp + + lea rax,[rax+rbp*8] + sub rbp,2 + + or dword ptr [rdi+rbx*4],edx + + cmp rax,32*8 + jbe fits__in__word__7 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word__7: +__mark_closure_with_unboxed_arguments__2: + lea rdx,[rcx+rbp*8] + mov rax,qword ptr [rcx] + or rax,2 + mov qword ptr [rcx],rax + mov rcx,qword ptr [rdx] + mov qword ptr [rdx],rsi + mov rsi,rdx + jmp __mark_node + +__mark_closure_with_unboxed_arguments: + mov rax,rbp + and rbp,255 + + sub rbp,1 + je __mark_closure_1_with_unboxed_argument + add rbp,2 + + shr rax,8 + add r14,rbp + + push rcx + lea rcx,[rdx+rbp*8] + + mov edx,dword ptr (bit_set_table2)[rdx] + sub rbp,rax + + or dword ptr [rdi+rbx*4],edx + cmp rcx,32*8 + jbe fits__in_word_7_ + or dword ptr 4[rdi+rbx*4],1 +fits__in_word_7_: + pop rcx + sub rbp,2 + jg __mark_closure_with_unboxed_arguments__2 + je __shared_argument_part + sub rcx,8 + jmp __mark_next_node + +__mark_closure_1_with_unboxed_argument: + sub rcx,8 + jmp __mark_real_or_file + +__mark_hnf_0: + cmp rax,offset dINT+2 + jne __no_int_3 + + mov rbp,qword ptr 8[rcx] + cmp rbp,33 + jb ____small_int + +__mark_bool_or_small_string: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,2 + or dword ptr [rdi+rbx*4],edx + cmp rdx,40000000h + jbe __mark_next_node + or dword ptr 4[rdi+rbx*4],1 + jmp __mark_next_node + +____small_int: + shl rbp,4 + lea rcx,(small_integers)[rbp] + jmp __mark_next_node + +__no_int_3: + jb __mark_real_file_or_string + + cmp rax,offset CHAR+2 + jne __no_char_3 + + movzx rbp,byte ptr 8[rcx] + shl rbp,4 + lea rcx,(static_characters)[rbp] + jmp __mark_next_node + +__no_char_3: + jb __mark_bool_or_small_string + + ifdef NEW_DESCRIPTORS + lea rcx,((-8)-2)[rax] + else + lea rcx,((-12)-2)[rax] + endif + jmp __mark_next_node + +__mark_real_file_or_string: + lea r9,__STRING__+2 + cmp rax,r9 + jbe __mark_string_or_array + +__mark_real_or_file: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + + or dword ptr [rdi+rbx*4],edx + + cmp rdx,20000000h + jbe __mark_next_node + + or dword ptr 4[rdi+rbx*4],1 + jmp __mark_next_node + +__mark__record: + sub rbp,258 + je __mark_record_2 + jl __mark_record_1 + +__mark_record_3: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,20000000h + jbe fits__in__word__13 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word__13: + movzx rbx,word ptr (-2+2)[rax] + + mov rdx,qword ptr 8[rcx] + add rdx,r10 + mov rax,rdx + and rdx,31*8 + shr rax,8 + + push rsi + + mov esi,dword ptr (bit_set_table2)[rdx] + test esi,dword ptr [rdi+rax*4] + jne __shared_record_argument_part + + add rbp,1 + or dword ptr [rdi+rax *4],esi + + lea rdx,[rdx+rbp*8] + add r14,rbp + + pop rsi + + cmp rdx,32*8 + jbe fits__in__word__14 + or dword ptr 4[rdi+rax*4],1 +fits__in__word__14: + sub rbx,1 + mov rdx,qword ptr 8[rcx] + jl __mark_record_3_bb + je __shared_argument_part + + mov qword ptr 8[rcx],rsi + add rcx,8 + + sub rbx,1 + je __mark_record_3_aab + + lea rsi,[rdx+rbx*8] + mov rax,qword ptr [rdx] + or rax,1 + mov rbp,qword ptr [rsi] + mov qword ptr [rdx],rax + mov qword ptr [rsi],rcx + mov rcx,rbp + jmp __mark_node + +__mark_record_3_bb: + sub rcx,8 + jmp __mark_next_node + +__mark_record_3_aab: + mov rbp,qword ptr [rdx] + mov qword ptr [rdx],rcx + lea rsi,1[rdx] + mov rcx,rbp + jmp __mark_node + +__shared_record_argument_part: + mov rdx,qword ptr 8[rcx] + + pop rsi + + test rbx,rbx + jne __shared_argument_part + sub rcx,8 + jmp __mark_next_node + +__mark_record_2: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,20000000h + jbe fits__in__word_12 + or dword ptr 4[rdi+rbx*4],1 +fits__in__word_12: + cmp word ptr (-2+2)[rax],1 + ja __mark_record_2_c + je __shared_argument_part + sub rcx,8 + jmp __mark_next_node + +__mark_record_1: + cmp word ptr (-2+2)[rax],0 + jne __mark_hnf_1 + sub rcx,8 + jmp __mark_bool_or_small_string + +__mark_string_or_array: + je __mark_string_ + +__mark_array: + mov rbp,qword ptr 16[rcx] + test rbp,rbp + je __mark_lazy_array + + movzx rax,word ptr (-2)[rbp] + test rax,rax + je __mark_strict_basic_array + + movzx rbp,word ptr (-2+2)[rbp] + test rbp,rbp + je __mark_b_record_array + + sub rax,256 + cmp rax,rbp + je __mark_a_record_array + +__mark__ab__record__array: + push rdx + push rbx + mov rbx,rbp + + mov rbp,qword ptr 8[rcx] + add rcx,16 + push rcx + + shl rbp,3 + mov rdx,rax + imul rdx,rbp + + sub rax,rbx + add rcx,8 + add rdx,rcx + + call reorder + + pop rcx + + xchg rax,rbx + mov rbp,qword ptr (-8)[rcx] + imul rax,rbp + imul rbx,rbp + add r14,rbx + add rbx,rax + + shl rbx,3 + lea rbp,[r10+rcx] + add rbp,rbx + + pop rbx + pop rdx + + mov edx,dword ptr (bit_set_table2)[rdx] + or dword ptr [rdi+rbx*4],edx + + lea rdx,[rcx+rax*8] + jmp __mark_r_array + +__mark_a_record_array: + imul rax,qword ptr 8[rcx] + add rcx,16 + jmp __mark_lr_array + +__mark_lazy_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + +__mark_lr_array: + mov edx,dword ptr (bit_set_table2)[rdx] + mov rbp,r10 + or dword ptr [rdi+rbx*4],edx + lea rdx,[rcx+rax*8] + add rbp,rdx +__mark_r_array: + shr rbp,8 + + cmp rbx,rbp + jae __skip_mark_lazy_array_bits + + inc rbx + +__mark_lazy_array_bits: + or dword ptr [rdi+rbx*4],1 + inc rbx + cmp rbx,rbp + jbe __mark_lazy_array_bits + +__skip_mark_lazy_array_bits: + add r14,3 + add r14,rax + + cmp rax,1 + jbe __mark_array_length_0_1 + + mov rbp,qword ptr [rdx] + mov rbx,qword ptr [rcx] + mov qword ptr [rdx],rbx + mov qword ptr [rcx],rbp + + mov rbp,qword ptr (-8)[rdx] + sub rdx,8 + mov rbx,qword ptr lazy_array_list + add rbp,2 + mov qword ptr [rdx],rbx + mov qword ptr (-8)[rcx],rbp + mov qword ptr (-16)[rcx],rax + sub rcx,16 + mov qword ptr lazy_array_list,rcx + + mov rcx,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rsi + lea rsi,(-8)[rdx] + jmp __mark_node + +__mark_array_length_0_1: + lea rcx,(-16)[rcx] + jb __mark_next_node + + mov rbx,qword ptr 24[rcx] + mov rbp,qword ptr 16[rcx] + mov rdx,qword ptr lazy_array_list + mov qword ptr 24[rcx],rbp + mov qword ptr 16[rcx],rdx + mov qword ptr [rcx],rax + mov qword ptr lazy_array_list,rcx + mov qword ptr 8[rcx],rbx + add rcx,8 + + mov rbp,qword ptr [rcx] + mov qword ptr [rcx],rsi + lea rsi,2[rcx] + mov rcx,rbp + jmp __mark_node + +__mark_b_record_array: + mov rbp,qword ptr 8[rcx] + sub rax,256 + imul rax,rbp + add rax,3 + jmp __mark_basic_array + +__mark_strict_basic_array: + mov rax,qword ptr 8[rcx] + cmp rbp,offset BOOL+2 + je __mark__strict__bool__array +__mark__strict__real__array: +__mark__strict__int__array: + add rax,3 + jmp __mark_basic_array +__mark__strict__bool__array: + add rax,24+7 + shr rax,3 + jmp __mark_basic_array + +__mark_string_: + mov rax,qword ptr 8[rcx] + add rax,16+7 + shr rax,3 + +__mark_basic_array: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,rax + + or dword ptr [rdi+rbx*4],edx + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae __mark_next_node + + inc rbx + mov rbp,1 + + cmp rbx,rax + jae __last__string__bits + +__mark_string_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + jb __mark_string_lp + +__last__string__bits: + or dword ptr [rdi+rbx*4],ebp + jmp __mark_next_node diff --git a/amark_prefetch.asm b/amark_prefetch.asm new file mode 100644 index 0000000..b0e38c4 --- /dev/null +++ b/amark_prefetch.asm @@ -0,0 +1,1748 @@ + +_TEXT ends + _DATA segment +n_queue_items: + dq 0 +queue_first: + dq 0 +queue: + dq 0,0,0,0,0,0,0,0 + dq 0,0,0,0,0,0,0,0 +_DATA ends + _TEXT segment + +pmark: + mov rax,qword ptr heap_size_65 + xor rbx,rbx + + mov qword ptr n_marked_words,rbx + shl rax,6 + + mov qword ptr heap_size_64_65,rax + mov qword ptr lazy_array_list,rbx + + lea rsi,(-4000)[rsp] + + mov rax,qword ptr caf_list + + mov qword ptr end_stack,rsi + + mov r15,0 + mov r8,0 + + mov r10,neg_heap_p3 + mov r11,heap_size_64_65 + mov r13,qword ptr end_stack + mov r14,0 + + test rax,rax + je end_pmark_cafs + +pmark_cafs_lp: + mov rbx,qword ptr [rax] + mov rbp,qword ptr (-8)[rax] + + push rbp + lea rbp,8[rax] + lea r12,8[rax+rbx*8] + + call pmark_stack_nodes + + pop rax + test rax,rax + jne pmark_cafs_lp + +end_pmark_cafs: + mov rsi,qword ptr stack_top + mov rbp,qword ptr stack_p + + mov r12,rsi + call pmark_stack_nodes + jmp continue_mark_after_pmark + +; %rbp : pointer to stack element +; %rdi : heap_vector +; %rax ,%rbx ,%rcx ,%rdx ,%rsi : free + +pmark_stack_nodes: + cmp rbp,r12 + je end_pmark_nodes +pmark_stack_nodes_: + mov rcx,qword ptr [rbp] + + add rbp,8 + lea rdx,[r10+rcx] + + cmp rdx,r11 + jnc pmark_stack_nodes + + mov rbx,rdx + and rdx,31*8 + + shr rbx,8 + mov esi,dword ptr (bit_set_table2)[rdx] + + test esi,dword ptr [rdi+rbx*4] + jne pmark_stack_nodes + + push rbp + + push 0 + + jmp pmark_node_ + +pmark_hnf_2: + cmp rsi,20000000h + jbe pmark_fits_in_word_6 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_6: + add r14,3 + +pmark_record_2_c: + mov rbx,qword ptr 8[rcx] + push rbx + + cmp rsp,r13 + jb pmarkr_using_reversal + +pmark_node2: +pmark_shared_argument_part: + mov rcx,qword ptr [rcx] + +pmark_node: + lea rdx,[r10+rcx] + cmp rdx,r11 + jnc pmark_next_node + + mov rbx,rdx + and rdx,31*8 + + shr rbx,8 + mov esi,dword ptr (bit_set_table2)[rdx] + + test esi,dword ptr [rdi+rbx*4] + jne pmark_next_node + +pmark_node_: + + prefetch [rcx] + + mov qword ptr (queue)[r8],rcx + lea rdx,[r8+r15*8] + add r8,8 + + and r8,15*8 + and rdx,15*8 + + cmp r15,-4 + je pmark_last_item_in_queue + +pmark_add_items: + mov rcx,qword ptr [rsp] + test rcx,rcx + jne pmark_add_stacked_item + +pmark_add_items2: + mov rbp,qword ptr 8[rsp] + cmp rbp,r12 + je pmark_last_item_in_queue + + mov rcx,qword ptr [rbp] + add rbp,8 + mov qword ptr 8[rsp],rbp + + lea rbp,[r10+rcx] + cmp rbp,r11 + jnc pmark_add_items2 + mov rax,rbp + and rbp,31*8 + shr rax,8 + mov esi,dword ptr (bit_set_table2)[rbp] + test esi,dword ptr [rdi+rax*4] + jne pmark_add_items2 + + prefetch [rcx] + + mov qword ptr (queue)[r8],rcx + add r8,8 + and r8,15*8 + + sub r15,1 + + cmp r15,-4 + jne pmark_add_items2 + jmp pmark_last_item_in_queue + +pmark_add_stacked_item: + add rsp ,8 + + lea rbp,[r10+rcx] + cmp rbp,r11 + jnc pmark_add_items + mov rax,rbp + and rbp,31*8 + shr rax,8 + mov esi,dword ptr (bit_set_table2)[rbp] + test esi,dword ptr [rdi+rax*4] + jne pmark_add_items + + prefetch [rcx] + + mov qword ptr (queue)[r8],rcx + add r8,8 + and r8,15*8 + + sub r15,1 + + cmp r15,-4 + jne pmark_add_items + +pmark_last_item_in_queue: + mov rcx,qword ptr (queue)[rdx] + + lea rdx,[r10+rcx] + + mov rbx,rdx + and rdx,31*8 + shr rbx,8 + mov esi,dword ptr (bit_set_table2)[rdx] + + test esi,dword ptr [rdi+rbx*4] + jne pmark_next_node + +pmark_arguments: + mov rax,qword ptr [rcx] + test rax,2 + je pmark_lazy_node + + movzx rbp,word ptr (-2)[rax] + + test rbp,rbp + je pmark_hnf_0 + + or dword ptr [rdi+rbx*4],esi + add rcx,8 + + cmp rbp,256 + jae pmark_record + + sub rbp,2 + je pmark_hnf_2 + jb pmark_hnf_1 + +pmark_hnf_3: + mov rdx,qword ptr 8[rcx] + + cmp rsi,20000000h + jbe pmark_fits_in_word_1 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_1: + + add r14,3 + lea rax,[r10+rdx] + mov rbx,rax + + and rax,31*8 + shr rbx,8 + + mov esi,dword ptr (bit_set_table2)[rax] + + test esi,dword ptr [rdi+rbx*4] + jne pmark_shared_argument_part + +pmark_no_shared_argument_part: + or dword ptr [rdi+rbx*4],esi + add rbp,1 + + add r14,rbp + lea rax,[rax+rbp*8] + lea rdx,(-8)[rdx+rbp*8] + + cmp rax,32*8 + jbe pmark_fits_in_word_2 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_2: + + mov rbx,qword ptr [rdx] + sub rbp,2 + push rbx + +pmark_push_hnf_args: + mov rbx,qword ptr (-8)[rdx] + sub rdx,8 + push rbx + sub rbp,1 + jge pmark_push_hnf_args + + cmp rsp,r13 + jae pmark_node2 + + jmp pmarkr_using_reversal + +pmark_hnf_1: + cmp rsi,40000000h + jbe pmark_fits_in_word_4 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_4: + add r14,2 + mov rcx,qword ptr [rcx] + jmp pmark_node + +pmark_lazy_node_1: + add rcx,8 + or dword ptr [rdi+rbx*4],esi + cmp rsi,20000000h + jbe pmark_fits_in_word_3 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_3: + add r14,3 + + cmp rbp,1 + je pmark_node2 + +pmark_selector_node_1: + add rbp,2 + mov rdx,qword ptr [rcx] + je pmark_indirection_node + + lea rsi,[r10+rdx] + mov rbx,rsi + + shr rbx,8 + and rsi,31*8 + + add rbp,1 + + mov esi,dword ptr (bit_set_table2)[rsi] + jle pmark_record_selector_node_1 + + test esi,dword ptr [rdi+rbx*4] + jne pmark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + je pmark_node3 + + cmp word ptr (-2)[rbp],2 + jbe pmark_small_tuple_or_record + +pmark_large_tuple_or_record: + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + mov ebp,dword ptr (bit_set_table2)[rbp] + test ebp,dword ptr [rdi+rbx*4] + jne pmark_node3 + + ifdef NEW_DESCRIPTORS + mov eax,(-8)[rax] + lea rbp,__indirection + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr 4[rax] + mov rbp,rcx + + cmp rax,16 + jl pmark_tuple_selector_node_1 + mov rdx,r9 + je pmark_tuple_selector_node_2 + mov rcx,qword ptr (-24)[r9+rax] + mov qword ptr [rbp],rcx + jmp pmark_node + +pmark_tuple_selector_node_2: + mov rcx,qword ptr [r9] + mov qword ptr [rbp],rcx + jmp pmark_node + endif + +pmark_small_tuple_or_record: + ifdef NEW_DESCRIPTORS + mov eax,(-8)[rax] + lea rbp,__indirection + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr 4[rax] + mov rbp,rcx +pmark_tuple_selector_node_1: + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rbp],rcx + else + mov eax,(-8)[rax] + push rcx + mov rcx,rdx + mov eax,4[rax] + call near ptr rax + pop rdx + + lea r9,__indirection + mov qword ptr (-8)[rdx],r9 + mov qword ptr [rdx],rcx + endif + jmp pmark_node + +pmark_record_selector_node_1: + je pmark_strict_record_selector_node_1 + + test esi,dword ptr [rdi+rbx*4] + jne pmark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + je pmark_node3 + + cmp word ptr (-2)[rbp],258 + jbe pmark_small_tuple_or_record + ifdef NEW_DESCRIPTORS + mov rbp,qword ptr 16[rdx] + mov r9,rbp + + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + mov ebp,dword ptr (bit_set_table2)[rbp] + test ebp,dword ptr [rdi+rbx*4] + jne pmark_node3 + + mov eax,(-8)[rax] + lea rbp,__indirection + mov qword ptr (-8)[rcx],rbp + movzx eax,word ptr 4[rax] + mov rbp,rcx + + cmp rax,16 + jle pmark_record_selector_node_2 + mov rdx,r9 + sub rax,24 +pmark_record_selector_node_2: + mov rcx,qword ptr [rdx+rax] + mov qword ptr [rbp],rcx + jmp pmark_node + else + jmp pmark_large_tuple_or_record + endif + +pmark_strict_record_selector_node_1: + test esi,dword ptr [rdi+rbx*4] + jne pmark_node3 + + mov rbp,qword ptr [rdx] + test rbp,2 + je pmark_node3 + + cmp word ptr (-2)[rbp],258 + jbe pmark_select_from_small_record + + mov rbp,qword ptr 16[rdx] + add rbp,r10 + mov rbx,rbp + and rbp,31*8 + shr rbx,8 + mov ebp,dword ptr (bit_set_table2)[rbp] + test ebp,dword ptr [rdi+rbx*4] + jne pmark_node3 + +pmark_select_from_small_record: + mov eax,(-8)[rax] + sub rcx,8 + + ifdef NEW_DESCRIPTORS + movzx ebx,word ptr 4[rax] + cmp rbx,16 + jle pmark_strict_record_selector_node_2 + mov rbx,qword ptr (-24)[r9+rbx] + jmp pmark_strict_record_selector_node_3 +pmark_strict_record_selector_node_2: + mov rbx,qword ptr [rdx+rbx] +pmark_strict_record_selector_node_3: + mov qword ptr 8[rcx],rbx + + movzx ebx,word ptr 6[rax] + test rbx,rbx + je pmark_strict_record_selector_node_5 + cmp rbx,16 + jle pmark_strict_record_selector_node_4 + mov rdx,r9 + sub rbx,24 +pmark_strict_record_selector_node_4: + mov rbx,qword ptr [rdx+rbx] + mov qword ptr 16[rcx],rbx +pmark_strict_record_selector_node_5: + + mov rax,qword ptr (-8)[rax] + mov qword ptr [rcx],rax + else + mov eax,4[rax] + call near ptr rax + endif + jmp pmark_next_node + +pmark_indirection_node: +pmark_node3: + mov rcx,rdx + jmp pmark_node + +pmark_next_node: + pop rcx + test rcx,rcx + jne pmark_node + + pop rbp + cmp rbp,r12 + jne pmark_stack_nodes_ + +end_pmark_nodes: + test r15,r15 + je end_pmark_nodes_ + + push rbp + + push 0 + + lea rdx,[r8+r15*8] + add r15,1 + + and rdx,15*8 + + jmp pmark_last_item_in_queue + +end_pmark_nodes_: + ret + +pmark_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + je pmark_real_or_file + + cmp rbp,1 + jle pmark_lazy_node_1 + + cmp rbp,256 + jge pmark_closure_with_unboxed_arguments + inc rbp + or dword ptr [rdi+rbx*4],esi + + add r14,rbp + lea rdx,[rdx+rbp*8] + lea rcx,[rcx+rbp*8] + + cmp rdx,32*8 + jbe pmark_fits_in_word_7 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_7: + sub rbp,3 +pmark_push_lazy_args: + mov rbx,qword ptr (-8)[rcx] + sub rcx,8 + push rbx + sub rbp,1 + jge pmark_push_lazy_args + + sub rcx,8 + + cmp rsp,r13 + jae pmark_node2 + + jmp pmarkr_using_reversal + +pmark_closure_with_unboxed_arguments: + mov rax,rbp + and rbp,255 + sub rbp,1 + je pmark_real_or_file + + shr rax,8 + add rbp,2 + + or dword ptr [rdi+rbx*4],esi + add r14,rbp + lea rdx,[rdx+rbp*8] + + sub rbp,rax + + cmp rdx,32*8 + jbe pmark_fits_in_word_7_ + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_7_: + sub rbp,2 + jl pmark_next_node + + lea rcx,16[rcx+rbp*8] + jne pmark_push_lazy_args + +pmark_closure_with_one_boxed_argument: + mov rcx,qword ptr (-8)[rcx] + jmp pmark_node + +pmark_hnf_0: + lea r9,dINT+2 + cmp rax,r9 + jb pmark_real_file_or_string + + or dword ptr [rdi+rbx*4],esi + + lea r9,CHAR+2 + cmp rax,r9 + ja pmark_normal_hnf_0 + +pmark_bool: + add r14,2 + + cmp rsi,40000000h + jbe pmark_next_node + + or dword ptr 4[rdi+rbx*4],1 + jmp pmark_next_node + +pmark_normal_hnf_0: + inc r14 + jmp pmark_next_node + +pmark_real_file_or_string: + lea r9,__STRING__+2 + cmp rax,r9 + jbe pmark_string_or_array + +pmark_real_or_file: + or dword ptr [rdi+rbx*4],esi + add r14,3 + + cmp rsi,20000000h + jbe pmark_next_node + + or dword ptr 4[rdi+rbx*4],1 + jmp pmark_next_node + +pmark_record: + sub rbp,258 + je pmark_record_2 + jl pmark_record_1 + +pmark_record_3: + add r14,3 + + cmp rsi,20000000h + jbe pmark_fits_in_word_13 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_13: + mov rdx,qword ptr 8[rcx] + + movzx rbx,word ptr (-2+2)[rax] + lea rsi,[r10+rdx] + + mov rax,rsi + and rsi,31*8 + + shr rax,8 + sub rbx,1 + + mov edx,dword ptr (bit_set_table2)[rsi] + jb pmark_record_3_bb + + test edx,dword ptr [rdi+rax*4] + jne pmark_node2 + + add rbp,1 + or dword ptr [rdi+rax*4],edx + add r14,rbp + lea rsi,[rsi+rbp*8] + + cmp rsi,32*8 + jbe pmark_push_record_arguments + or dword ptr 4[rdi+rax*4],1 +pmark_push_record_arguments: + mov rdx,qword ptr 8[rcx] + mov rbp,rbx + shl rbx,3 + add rdx,rbx + sub rbp,1 + jge pmark_push_hnf_args + + jmp pmark_node2 + +pmark_record_3_bb: + test edx,dword ptr [rdi+rax*4] + jne pmark_next_node + + add rbp,1 + or dword ptr [rdi+rax*4],edx + add r14,rbp + lea rsi,[rsi+rbp*8] + + cmp rsi,32*8 + jbe pmark_next_node + + or dword ptr 4[rdi+rax*4],1 + jmp pmark_next_node + +pmark_record_2: + cmp rsi,20000000h + jbe pmark_fits_in_word_12 + or dword ptr 4[rdi+rbx*4],1 +pmark_fits_in_word_12: + add r14,3 + + cmp word ptr (-2+2)[rax],1 + ja pmark_record_2_c + je pmark_node2 + jmp pmark_next_node + +pmark_record_1: + cmp word ptr (-2+2)[rax],0 + jne pmark_hnf_1 + + jmp pmark_bool + +pmark_string_or_array: + je pmark_string_ + +pmark_array: + mov rbp,qword ptr 16[rcx] + test rbp,rbp + je pmark_lazy_array + + movzx rax,word ptr (-2)[rbp] + + test rax,rax + je pmark_strict_basic_array + + movzx rbp,word ptr (-2+2)[rbp] + test rbp,rbp + je pmark_b_record_array + + cmp rsp,r13 + jb pmark_array_using_reversal + + sub rax,256 + cmp rax,rbp + je pmark_a_record_array + +pmark_ab_record_array: + or dword ptr [rdi+rbx*4],esi + mov rbp,qword ptr 8[rcx] + + imul rax,rbp + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae pmark_end_set_ab_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae pmark_last_ab_array_bits + +pmark_ab_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + jb pmark_ab_array_lp + +pmark_last_ab_array_bits: + or dword ptr [rdi+rbx*4],ebp + +pmark_end_set_ab_array_bits: + mov rax,qword ptr 8[rcx] + mov rdx,qword ptr 16[rcx] + movzx rbx,word ptr (-2+2)[rdx] + movzx rdx,word ptr (-2)[rdx] + shl rbx,3 + lea rdx,(-2048)[rdx*8] + push rbx + push rdx + lea rbp,24[rcx] + push r12 + jmp pmark_ab_array_begin + +pmark_ab_array: + mov rbx,qword ptr 16[rsp] + push rax + push rbp + lea r12,[rbp+rbx] + + call pmark_stack_nodes + + mov rbx,qword ptr (8+16)[rsp] + pop rbp + pop rax + add rbp,rbx +pmark_ab_array_begin: + sub rax,1 + jnc pmark_ab_array + + pop r12 + add rsp,16 + jmp pmark_next_node + +pmark_a_record_array: + or dword ptr [rdi+rbx*4],esi + mov rbp,qword ptr 8[rcx] + + imul rax,rbp + push rax + + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae pmark_end_set_a_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae pmark_last_a_array_bits + +pmark_a_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + jb pmark_a_array_lp + +pmark_last_a_array_bits: + or dword ptr [rdi+rbx*4],ebp + +pmark_end_set_a_array_bits: + pop rax + lea rbp,24[rcx] + + push r12 + lea r12,24[rcx+rax*8] + + call pmark_stack_nodes + + pop r12 + jmp pmark_next_node + +pmark_lazy_array: + cmp rsp,r13 + jb pmark_array_using_reversal + + or dword ptr [rdi+rbx*4],esi + mov rax,qword ptr 8[rcx] + + add rax,3 + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae pmark_end_set_lazy_array_bits + + inc rbx + mov rbp,1 + cmp rbx,rax + jae pmark_last_lazy_array_bits + +pmark_lazy_array_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + jb pmark_lazy_array_lp + +pmark_last_lazy_array_bits: + or dword ptr [rdi+rbx*4],ebp + +pmark_end_set_lazy_array_bits: + mov rax,qword ptr 8[rcx] + lea rbp,24[rcx] + + push r12 + lea r12,24[rcx+rax*8] + + call pmark_stack_nodes + + pop r12 + jmp pmark_next_node + +pmark_array_using_reversal: + push 0 + mov rsi,1 + jmp pmarkr_node + +pmark_strict_basic_array: + mov rax,qword ptr 8[rcx] + cmp rbp,offset BOOL+2 + je pmark_strict_bool_array +pmark_strict_real_array: +pmark_strict_int_array: + add rax,3 + jmp pmark_basic_array_ +pmark_strict_bool_array: + add rax,24+7 + shr rax,3 + jmp pmark_basic_array_ + +pmark_b_record_array: + mov rbp,qword ptr 8[rcx] + sub rax,256 + imul rax,rbp + add rax,3 + jmp pmark_basic_array_ + +pmark_string_: + mov rax,qword ptr 8[rcx] + add rax,16+7 + shr rax,3 + +pmark_basic_array_: + or dword ptr [rdi+rbx*4],esi + + add r14,rax + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae pmark_next_node + + inc rbx + mov rbp,1 + cmp rbx,rax + jae pmark_last_string_bits + +pmark_string_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + jb pmark_string_lp + +pmark_last_string_bits: + or dword ptr [rdi+rbx*4],ebp + jmp pmark_next_node + +end_pmarkr_using_reversal: + pop rdx + test rdx,rdx + je pmark_next_node + mov qword ptr [rdx],rcx + jmp pmark_next_node + + +pmarkr_using_reversal: + push rcx + mov rsi,1 + mov rcx,qword ptr [rcx] + jmp pmarkr_node + +pmarkr_arguments: + mov rax,qword ptr [rcx] + test al,2 + je pmarkr_lazy_node + + movzx rbp,word ptr (-2)[rax] + test rbp,rbp + je pmarkr_hnf_0 + + add rcx,8 + + cmp rbp,256 + jae pmarkr_record + + sub rbp,2 + je pmarkr_hnf_2 + jb pmarkr_hnf_1 + +pmarkr_hnf_3: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + + or dword ptr [rdi+rbx*4],edx + + cmp rdx,20000000h + + mov rax,qword ptr 8[rcx] + + jbe pmarkr_fits_in_word_1 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_1: + add rax,r10 + + mov rbx,rax + and rax,31*8 + + shr rbx,8 + + mov edx,dword ptr (bit_set_table2)[rax] + test edx,dword ptr [rdi+rbx*4] + jne pmarkr_shared_argument_part + +pmarkr_no_shared_argument_part: + or dword ptr [rdi+rbx*4],edx + mov rdx,qword ptr 8[rcx] + + add rbp,1 + mov qword ptr 8[rcx],rsi + + add r14,rbp + add rcx,8 + + shl rbp,3 + or qword ptr [rdx],1 + + add rax,rbp + add rdx,rbp + + cmp rax,32*8 + jbe pmarkr_fits_in_word_2 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_2: + + mov rbp ,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rcx + lea rsi,(-8)[rdx] + mov rcx,rbp + jmp pmarkr_node + +pmarkr_hnf_1: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,2 + or dword ptr [rdi+rbx*4],edx + cmp rdx,40000000h + jbe pmarkr_shared_argument_part + or dword ptr 4[rdi+rbx*4],1 +pmarkr_shared_argument_part: + mov rbp,qword ptr [rcx] + mov qword ptr [rcx],rsi + lea rsi,2[rcx] + mov rcx,rbp + jmp pmarkr_node + +pmarkr_no_selector_2: + pop rbx +pmarkr_no_selector_1: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,20000000h + jbe pmarkr_shared_argument_part + + or dword ptr 4[rdi+rbx*4],1 + jmp pmarkr_shared_argument_part + +pmarkr_lazy_node_1: + je pmarkr_no_selector_1 + +pmarkr_selector_node_1: + add rbp,2 + je pmarkr_indirection_node + + add rbp,1 + + push rbx + mov rbp,qword ptr [rcx] + push rax + lea rax,[r10+rbp] + + jle pmarkr_record_selector_node_1 + + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,dword ptr [rdi+rbx*4] + pop rax + jne pmarkr_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + je pmarkr_no_selector_2 + + cmp word ptr (-2)[rbx],2 + jbe pmarkr_small_tuple_or_record + +pmarkr_large_tuple_or_record: + mov r8,qword ptr 16[rbp] + mov r9,r8 + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + mov r8d,dword ptr (bit_set_table2)[r8] + test r8d,dword ptr [rdi+rbx*4] + jne pmarkr_no_selector_2 + + ifdef NEW_DESCRIPTORS + mov eax,dword ptr (-8)[rax] + lea rdx,__indirection + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr 4[rax] + mov r8,rcx + + cmp rax,16 + jl pmarkr_tuple_selector_node_1 + mov rdx,r9 + je pmarkr_tuple_selector_node_2 + mov rcx,qword ptr (-24)[r9+rax] + mov qword ptr [r8],rcx + jmp pmarkr_node + +pmarkr_tuple_selector_node_2: + mov rcx,qword ptr [r9] + mov qword ptr [r8],rcx + jmp pmarkr_node + endif + +pmarkr_small_tuple_or_record: + ifdef NEW_DESCRIPTORS + mov eax,(-8)[rax] + lea rdx,__indirection + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr 4[rax] + mov r8,rcx +pmarkr_tuple_selector_node_1: + mov rcx,qword ptr [rbp+rax] + mov qword ptr [r8],rcx + jmp pmarkr_node + else + mov eax,(-8)[rax] + pop rbx + + push rcx + mov rcx,qword ptr [rcx] + mov eax,4[rax] + call near ptr rax + pop rdx + + mov qword ptr (-8)[rdx],offset __indirection + mov qword ptr [rdx],rcx + endif + jmp pmarkr_node + +pmarkr_record_selector_node_1: + je pmarkr_strict_record_selector_node_1 + + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,dword ptr [rdi+rbx*4] + pop rax + jne pmarkr_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + je pmarkr_no_selector_2 + + cmp word ptr (-2)[rbx],258 + ifdef NEW_DESCRIPTORS + jbe pmarkr_small_record + + mov r8,qword ptr 16[rbp] + mov r9,r8 + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + mov r8d,dword ptr (bit_set_table2)[r8] + test r8d,dword ptr [rdi+rbx*4] + jne pmarkr_no_selector_2 + +pmarkr_small_record: + mov eax,dword ptr (-8)[rax] + lea rdx,__indirection + pop rbx + + mov qword ptr (-8)[rcx],rdx + movzx eax,word ptr 4[rax] + mov r8,rcx + + cmp rax,16 + jle pmarkr_record_selector_node_2 + mov rdx,r9 + sub rax,24 +pmarkr_record_selector_node_2: + mov rcx,qword ptr [rbp+rax] + mov qword ptr [r8],rcx + jmp pmarkr_node + else + jbe pmarkr_small_tuple_or_record + jmp pmarkr_large_tuple_or_record + endif + +pmarkr_strict_record_selector_node_1: + mov rbx,rax + and rax,31*8 + shr rbx,8 + mov eax,dword ptr (bit_set_table2)[rax] + test eax,dword ptr [rdi+rbx*4] + pop rax + jne pmarkr_no_selector_2 + + mov rbx,qword ptr [rbp] + test bl,2 + je pmarkr_no_selector_2 + + cmp word ptr (-2)[rbx],258 + jle pmarkr_select_from_small_record + + mov r8,qword ptr 16[rbp] + mov r9,r8 + + add r8,r10 + mov rbx,r8 + and r8,31*8 + shr rbx,8 + mov r8d,dword ptr (bit_set_table2)[r8] + test r8d,dword ptr [rdi+rbx*4] + jne pmarkr_no_selector_2 + +pmarkr_select_from_small_record: + ifdef NEW_DESCRIPTORS + mov eax,(-8)[rax] + sub rcx,8 + + movzx ebx,word ptr 4[rax] + cmp rbx,16 + jle pmarkr_strict_record_selector_node_2 + mov rbx,qword ptr (-24)[r9+rbx] + jmp pmarkr_strict_record_selector_node_3 +pmarkr_strict_record_selector_node_2: + mov rbx,qword ptr [rdx+rbx] +pmarkr_strict_record_selector_node_3: + mov qword ptr 8[rcx],rbx + + movzx ebx,word ptr 6[rax] + test rbx,rbx + je pmarkr_strict_record_selector_node_5 + cmp rbx,16 + jle pmarkr_strict_record_selector_node_4 + mov rbp,r9 + sub rbx,24 +pmarkr_strict_record_selector_node_4: + mov rbx,qword ptr [rbp+rbx] + mov qword ptr 16[rcx],rbx +pmarkr_strict_record_selector_node_5: + pop rbx + + mov rax,qword ptr (-8)[rax] + mov qword ptr [rcx],rax + else + mov eax,(-8)[rax] + pop rbx + mov rdx,qword ptr [rcx] + sub rcx,8 + mov eax,4[rax] + call near ptr rax + endif + jmp pmarkr_node + +pmarkr_indirection_node: + mov rcx,qword ptr [rcx] + jmp pmarkr_node + +pmarkr_hnf_2: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,20000000h + jbe pmarkr_fits_in_word_6 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_6: + +pmarkr_record_2_c: + mov rax,qword ptr [rcx] + mov rbp,qword ptr 8[rcx] + or rax,2 + mov qword ptr 8[rcx],rsi + mov qword ptr [rcx],rax + lea rsi,8[rcx] + mov rcx,rbp + +pmarkr_node: + lea rdx,[r10+rcx] + + cmp rdx,r11 + jae pmarkr_next_node + + mov rbx,rdx + and rdx,31*8 + shr rbx,8 + mov ebp,dword ptr (bit_set_table2)[rdx] + test ebp,dword ptr [rdi+rbx*4] + je pmarkr_arguments + +pmarkr_next_node: + test rsi,3 + jne pmarkr_parent + + mov rbp,qword ptr (-8)[rsi] + mov rdx,qword ptr [rsi] + mov qword ptr [rsi],rcx + mov qword ptr (-8)[rsi],rdx + sub rsi,8 + + mov rcx,rbp + and rbp,3 + and rcx,-4 + or rsi,rbp + jmp pmarkr_node + +pmarkr_parent: + mov rbx,rsi + and rsi,-4 + je end_pmarkr_using_reversal + + and rbx,3 + mov rbp,qword ptr [rsi] + mov qword ptr [rsi],rcx + + sub rbx,1 + je pmarkr_argument_part_parent + + lea rcx,(-8)[rsi] + mov rsi,rbp + jmp pmarkr_next_node + +pmarkr_argument_part_parent: + and rbp,-4 + mov rdx,rsi + mov rcx,qword ptr (-8)[rbp] + mov rbx,qword ptr [rbp] + mov qword ptr (-8)[rbp],rbx + mov qword ptr [rbp],rdx + lea rsi,(2-8)[rbp] + jmp pmarkr_node + +pmarkr_lazy_node: + movsxd rbp,dword ptr (-4)[rax] + test rbp,rbp + je pmarkr_real_or_file + + add rcx,8 + cmp rbp,1 + jle pmarkr_lazy_node_1 + cmp rbp,256 + jge pmarkr_closure_with_unboxed_arguments + + add rbp,1 + mov rax,rdx + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,rbp + + lea rax,[rax+rbp*8] + sub rbp,2 + + or dword ptr [rdi+rbx*4],edx + + cmp rax,32*8 + jbe pmarkr_fits_in_word_7 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_7: +pmarkr_closure_with_unboxed_arguments_2: + lea rdx,[rcx+rbp*8] + mov rax,qword ptr [rcx] + or rax,2 + mov qword ptr [rcx],rax + mov rcx,qword ptr [rdx] + mov qword ptr [rdx],rsi + mov rsi,rdx + jmp pmarkr_node + +pmarkr_closure_with_unboxed_arguments: + mov rax,rbp + and rbp,255 + + sub rbp,1 + je pmarkr_closure_1_with_unboxed_argument + add rbp,2 + + shr rax,8 + add r14,rbp + + push rcx + lea rcx,[rdx+rbp*8] + + mov edx,dword ptr (bit_set_table2)[rdx] + sub rbp,rax + + or dword ptr [rdi+rbx*4],edx + cmp rcx,32*8 + jbe pmarkr_fits_in_word_7_ + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_7_: + pop rcx + sub rbp,2 + jg pmarkr_closure_with_unboxed_arguments_2 + je pmarkr_shared_argument_part + sub rcx,8 + jmp pmarkr_next_node + +pmarkr_closure_1_with_unboxed_argument: + sub rcx,8 + jmp pmarkr_real_or_file + +pmarkr_hnf_0: + cmp rax,offset dINT+2 + jne pmarkr_no_int_3 + + mov rbp,qword ptr 8[rcx] + cmp rbp,33 + + jb pmarkr_small_int + +pmarkr_bool_or_small_string: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,2 + or dword ptr [rdi+rbx*4],edx + cmp rdx,40000000h + jbe pmarkr_next_node + or dword ptr 4[rdi+rbx*4],1 + jmp pmarkr_next_node + +pmarkr_small_int: + shl rbp,4 + lea rcx,(small_integers)[rbp] + jmp pmarkr_next_node + +pmarkr_no_int_3: + jb pmarkr_real_file_or_string + + cmp rax,offset CHAR+2 + jne pmarkr_no_char_3 + + movzx rbp,byte ptr 8[rcx] + shl rbp,4 + lea rcx,(static_characters)[rbp] + jmp pmarkr_next_node + +pmarkr_no_char_3: + jb pmarkr_bool_or_small_string + + ifdef NEW_DESCRIPTORS + lea rcx,((-8)-2)[rax] + else + lea rcx,((-12)-2)[rax] + endif + jmp pmarkr_next_node + +pmarkr_real_file_or_string: + lea r9,__STRING__+2 + cmp rax,r9 + jbe pmarkr_string_or_array + +pmarkr_real_or_file: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + + or dword ptr [rdi+rbx*4],edx + + cmp rdx,20000000h + jbe pmarkr_next_node + + or dword ptr 4[rdi+rbx*4],1 + jmp pmarkr_next_node + +pmarkr_record: + sub rbp,258 + je pmarkr_record_2 + jl pmarkr_record_1 + +pmarkr_record_3: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,20000000h + jbe pmarkr_fits_in_word_13 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_13: + movzx rbx,word ptr (-2+2)[rax] + + mov rdx,qword ptr 8[rcx] + lea rdx,[r10+rdx] + mov rax,rdx + and rdx,31*8 + shr rax,8 + + push rsi + + mov esi,dword ptr (bit_set_table2)[rdx] + test esi,dword ptr [rdi+rax*4] + jne pmarkr_shared_record_argument_part + + add rbp,1 + or dword ptr [rdi+rax*4],esi + + lea rdx,[rdx+rbp*8] + add r14,rbp + + pop rsi + + cmp rdx,32*8 + jbe pmarkr_fits_in_word_14 + or dword ptr 4[rdi+rax*4],1 +pmarkr_fits_in_word_14: + sub rbx,1 + mov rdx,qword ptr 8[rcx] + jl pmarkr_record_3_bb + je pmarkr_shared_argument_part + + mov qword ptr 8[rcx],rsi + add rcx,8 + + sub rbx,1 + je pmarkr_record_3_aab + + lea rsi,[rdx+rbx*8] + mov rax,qword ptr [rdx] + or rax,1 + mov rbp,qword ptr [rsi] + mov qword ptr [rdx],rax + mov qword ptr [rsi],rcx + mov rcx,rbp + jmp pmarkr_node + +pmarkr_record_3_bb: + sub rcx,8 + jmp pmarkr_next_node + +pmarkr_record_3_aab: + mov rbp,qword ptr [rdx] + mov qword ptr [rdx],rcx + lea rsi,1[rdx] + mov rcx,rbp + jmp pmarkr_node + +pmarkr_shared_record_argument_part: + mov rdx,qword ptr 8[rcx] + + pop rsi + + test rbx,rbx + jne pmarkr_shared_argument_part + sub rcx,8 + jmp pmarkr_next_node + +pmarkr_record_2: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,3 + or dword ptr [rdi+rbx*4],edx + cmp rdx,20000000h + jbe pmarkr_fits_in_word_12 + or dword ptr 4[rdi+rbx*4],1 +pmarkr_fits_in_word_12: + cmp word ptr (-2+2)[rax],1 + ja pmarkr_record_2_c + je pmarkr_shared_argument_part + sub rcx,8 + jmp pmarkr_next_node + +pmarkr_record_1: + cmp word ptr (-2+2)[rax],0 + jne pmarkr_hnf_1 + sub rcx,8 + jmp pmarkr_bool_or_small_string + +pmarkr_string_or_array: + je pmarkr_string_ + +pmarkr_array: + mov rbp,qword ptr 16[rcx] + test rbp,rbp + je pmarkr_lazy_array + + movzx rax,word ptr (-2)[rbp] + test rax,rax + je pmarkr_strict_basic_array + + movzx rbp,word ptr (-2+2)[rbp] + test rbp,rbp + je pmarkr_b_record_array + + sub rax,256 + cmp rax,rbp + je pmarkr_a_record_array + +pmarkr_ab_record_array: + push rdx + push rbx + mov rbx,rbp + + mov rbp,qword ptr 8[rcx] + add rcx,16 + push rcx + + shl rbp,3 + mov rdx,rax + imul rdx,rbp + + sub rax,rbx + add rcx,8 + add rdx,rcx + + call reorder + + pop rcx + + xchg rax,rbx + mov rbp,qword ptr (-8)[rcx] + imul rax,rbp + imul rbx,rbp + add r14,rbx + add rbx,rax + + shl rbx,3 + lea rbp,[r10+rcx] + add rbp,rbx + + pop rbx + pop rdx + + mov edx,dword ptr (bit_set_table2)[rdx] + or dword ptr [rdi+rbx*4],edx + + lea rdx,[rcx+rax*8] + jmp pmarkr_r_array + +pmarkr_a_record_array: + imul rax,qword ptr 8[rcx] + add rcx,16 + jmp pmarkr_lr_array + +pmarkr_lazy_array: + mov rax,qword ptr 8[rcx] + add rcx,16 + +pmarkr_lr_array: + mov edx,dword ptr (bit_set_table2)[rdx] + mov rbp,r10 + or dword ptr [rdi+rbx*4],edx + lea rdx,[rcx+rax*8] + add rbp,rdx +pmarkr_r_array: + shr rbp,8 + + cmp rbx,rbp + jae pmarkr_skip_mark_lazy_array_bits + + inc rbx + +pmarkr_lazy_array_bits: + or dword ptr [rdi+rbx*4],1 + inc rbx + cmp rbx,rbp + jbe pmarkr_lazy_array_bits + +pmarkr_skip_mark_lazy_array_bits: + add r14,3 + add r14,rax + + cmp rax,1 + jbe pmarkr_array_length_0_1 + + mov rbp,qword ptr [rdx] + mov rbx,qword ptr [rcx] + mov qword ptr [rdx],rbx + mov qword ptr [rcx],rbp + + mov rbp,qword ptr (-8)[rdx] + sub rdx,8 + mov rbx,qword ptr lazy_array_list + add rbp,2 + mov qword ptr [rdx],rbx + mov qword ptr (-8)[rcx],rbp + mov qword ptr (-16)[rcx],rax + sub rcx,16 + mov qword ptr lazy_array_list,rcx + + mov rcx,qword ptr (-8)[rdx] + mov qword ptr (-8)[rdx],rsi + lea rsi,(-8)[rdx] + jmp pmarkr_node + +pmarkr_array_length_0_1: + lea rcx,(-16)[rcx] + jb pmarkr_next_node + + mov rbx,qword ptr 24[rcx] + mov rbp,qword ptr 16[rcx] + mov rdx,qword ptr lazy_array_list + mov qword ptr 24[rcx],rbp + mov qword ptr 16[rcx],rdx + mov qword ptr [rcx],rax + mov qword ptr lazy_array_list,rcx + mov qword ptr 8[rcx],rbx + add rcx,8 + + mov rbp,qword ptr [rcx] + mov qword ptr [rcx],rsi + lea rsi,2[rcx] + mov rcx,rbp + jmp pmarkr_node + +pmarkr_b_record_array: + mov rbp,qword ptr 8[rcx] + sub rax,256 + imul rax,rbp + add rax,3 + jmp pmarkr_basic_array + +pmarkr_strict_basic_array: + mov rax,qword ptr 8[rcx] + cmp rbp,offset BOOL+2 + je pmarkr_strict_bool_array +pmarkr_strict_real_array: +pmarkr_strict_int_array: + add rax,3 + jmp pmarkr_basic_array +pmarkr_strict_bool_array: + add rax,24+7 + shr rax,3 + jmp pmarkr_basic_array + +pmarkr_string_: + mov rax,qword ptr 8[rcx] + add rax,16+7 + shr rax,3 + +pmarkr_basic_array: + mov edx,dword ptr (bit_set_table2)[rdx] + add r14,rax + + or dword ptr [rdi+rbx*4],edx + lea rax,(-8)[rcx+rax*8] + + add rax,r10 + shr rax,8 + + cmp rbx,rax + jae pmarkr_next_node + + inc rbx + mov rbp,1 + + cmp rbx,rax + jae pmarkr_last_string_bits + +pmarkr_string_lp: + or dword ptr [rdi+rbx*4],ebp + inc rbx + cmp rbx,rax + jb pmarkr_string_lp + +pmarkr_last_string_bits: + or dword ptr [rdi+rbx*4],ebp + jmp pmarkr_next_node -- cgit v1.2.3