From 9f212d7bd23f24ca4898f0bf33cb865c68cfa78c Mon Sep 17 00:00:00 2001 From: John van Groningen Date: Fri, 2 Jun 2006 14:25:27 +0000 Subject: add time profiler and stack tracer for 64 bit windows and linux --- aprofile.asm | 851 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ atrace.asm | 442 +++++++++++++++++++++++++++++++ 2 files changed, 1293 insertions(+) create mode 100644 aprofile.asm create mode 100644 atrace.asm diff --git a/aprofile.asm b/aprofile.asm new file mode 100644 index 0000000..9d8d7e9 --- /dev/null +++ b/aprofile.asm @@ -0,0 +1,851 @@ + +_TEXT segment para 'CODE' +_TEXT ends +_DATA segment para 'DATA' +_DATA ends + + _TEXT segment + + public init_profiler + public profile_r + public profile_l + public profile_l2 + public profile_n + public profile_n2 + public profile_s + public profile_s2 + public profile_t + public write_profile_information + public write_profile_stack + public stack_trace_depth + + ifndef LINUX + extrn allocate_memory:near + endif + extrn __STRING__:near + extrn openF:near + extrn closeF:near + extrn writeFC:near + extrn writeFI:near + extrn ab_stack_size:near + extrn ew_print_string:near + extrn ew_print_char:near + extrn ew_print_text:near + extrn create_profile_file_name:near +; extrn print_error:near +; extrn profile_stack_pointer:near + +next = 0 +time = 8 +n_profiler_calls = 16 +n_strict_calls = 24 +n_lazy_calls = 32 +n_curried_calls = 40 +n_words_allocated = 48 +name_ = 56 +FunctionProfile = 64 + +profile_t: + push rax + push rdx + rdtsc + + push rcx + mov rcx,qword ptr profile_stack_pointer + + sub edx,dword ptr global_time_hi + push rbx + mov ebx,dword ptr global_time_lo + mov eax,eax + + shl rdx,32 + sub rax,rbx + mov rbx,qword ptr (-8)[rcx] + + add rax,rdx + + sub rcx,8 + mov qword ptr global_last_tail_call,rbx + + mov qword ptr profile_stack_pointer,rcx + + inc qword ptr n_profiler_calls[rbx] + add qword ptr time[rbx],rax + + mov rax,qword ptr global_n_words_free + mov qword ptr global_n_words_free,r15 + sub rax,r15 + add qword ptr n_words_allocated[rbx],rax + + pop rbx + pop rcx + + rdtsc + mov dword ptr global_time_hi,edx + pop rdx + mov dword ptr global_time_lo,eax + pop rax + ret + +profile_r: + push rax + push rdx + rdtsc + + push rcx + mov rcx,qword ptr profile_stack_pointer + + sub edx,dword ptr global_time_hi + push rbx + mov ebx,dword ptr global_time_lo + mov eax,eax + + shl rdx,32 + sub rax,rbx + mov rbx,qword ptr (-8)[rcx] + + add rax,rdx + + sub rcx,8 + mov qword ptr global_last_tail_call,0 + + mov qword ptr profile_stack_pointer,rcx + + inc qword ptr n_profiler_calls[rbx] + add qword ptr time[rbx],rax + + mov rax,qword ptr global_n_words_free + mov qword ptr global_n_words_free,r15 + sub rax,r15 + add qword ptr n_words_allocated[rbx],rax + + pop rbx + pop rcx + + rdtsc + mov dword ptr global_time_hi,edx + pop rdx + mov dword ptr global_time_lo,eax + pop rax + ret + +profile_l: + push rax + push rdx + rdtsc + + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_l +allocate_function_profile_record_lr: + push rcx + + mov rbp,qword ptr global_last_tail_call + mov rcx,qword ptr profile_stack_pointer + + test rbp,rbp + jne use_tail_calling_function_l + + mov rbp,qword ptr (-8)[rcx] +use_tail_calling_function_lr: + + mov qword ptr [rcx],rbx + add rcx,8 + + inc qword ptr n_curried_calls[rbx] + jmp profile_n_ + +allocate_function_profile_record_l: + call allocate_function_profile_record + jmp allocate_function_profile_record_lr + +use_tail_calling_function_l: + mov qword ptr global_last_tail_call,0 + jmp use_tail_calling_function_lr + +profile_l2: + push rax + push rdx + rdtsc + + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_l2 +allocate_function_profile_record_l2r: + push rcx + + mov rbp,qword ptr global_last_tail_call + mov rcx,qword ptr profile_stack_pointer + + test rbp,rbp + jne use_tail_calling_function_l2 + + mov rbp,qword ptr (-8)[rcx] +use_tail_calling_function_l2r: + + mov qword ptr [rcx],rbx + mov qword ptr 8[rcx],rbx + add rcx,16 + + inc qword ptr n_curried_calls[rbx] + jmp profile_n_ + +allocate_function_profile_record_l2: + call allocate_function_profile_record + jmp allocate_function_profile_record_l2r + +use_tail_calling_function_l2: + mov qword ptr global_last_tail_call,0 + jmp use_tail_calling_function_l2r + +profile_n: + push rax + push rdx + rdtsc + + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_n +allocate_function_profile_record_nr: + push rcx + + mov rbp,qword ptr global_last_tail_call + mov rcx,qword ptr profile_stack_pointer + + test rbp,rbp + jne use_tail_calling_function_n + + mov rbp,qword ptr (-8)[rcx] +use_tail_calling_function_nr: + + mov qword ptr [rcx],rbx + add rcx,8 + + inc qword ptr n_lazy_calls[rbx] + jmp profile_n_ + +allocate_function_profile_record_n: + call allocate_function_profile_record + jmp allocate_function_profile_record_nr + +use_tail_calling_function_n: + mov qword ptr global_last_tail_call,0 + jmp use_tail_calling_function_nr + +profile_n2: + push rax + push rdx + rdtsc + + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_n2 +allocate_function_profile_record_n2r: + push rcx + + mov rbp,qword ptr global_last_tail_call + mov rcx,qword ptr profile_stack_pointer + + test rbp,rbp + jne use_tail_calling_function_n2 + + mov rbp,qword ptr (-8)[rcx] +use_tail_calling_function_n2r: + + mov qword ptr [rcx],rbx + mov qword ptr 8[rcx],rbx + add rcx,16 + + inc qword ptr n_lazy_calls[rbx] + jmp profile_n_ + +allocate_function_profile_record_n2: + call allocate_function_profile_record + jmp allocate_function_profile_record_n2r + +use_tail_calling_function_n2: + mov qword ptr global_last_tail_call,0 + jmp use_tail_calling_function_n2r + +profile_s2: + push rax + push rdx + rdtsc + + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_s2 +allocate_function_profile_record_s2r: + push rcx + + mov rbp,qword ptr global_last_tail_call + mov rcx,qword ptr profile_stack_pointer + + test rbp,rbp + jne use_tail_calling_function_s2 + + mov rbp,qword ptr (-8)[rcx] +use_tail_calling_function_s2r: + + mov qword ptr [rcx],rbx + mov qword ptr 8[rcx],rbx + add rcx,16 + jmp profile_s_ + +allocate_function_profile_record_s2: + call allocate_function_profile_record + jmp allocate_function_profile_record_s2r + +use_tail_calling_function_s2: + mov qword ptr global_last_tail_call,0 + jmp use_tail_calling_function_s2r + +profile_s: + push rax + push rdx + rdtsc + + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_s +allocate_function_profile_record_sr: + push rcx + + mov rbp,qword ptr global_last_tail_call + mov rcx,qword ptr profile_stack_pointer + + test rbp,rbp + jne use_tail_calling_function_s + + mov rbp,qword ptr (-8)[rcx] +use_tail_calling_function_sr: + + mov qword ptr [rcx],rbx + add rcx,8 + +profile_s_: + inc qword ptr n_strict_calls[rbx] + +profile_n_: + mov qword ptr profile_stack_pointer,rcx + + sub edx,dword ptr global_time_hi + mov ebx,dword ptr global_time_lo + mov eax,eax + + shl rdx,32 + sub rax,rbx + + add rax,rdx + + inc qword ptr n_profiler_calls[rbp] + add rax,qword ptr time[rbp] + + mov rax,qword ptr global_n_words_free + mov qword ptr global_n_words_free,r15 + sub rax,r15 + add qword ptr n_words_allocated[rbp],rax + + pop rcx + pop rbx + + rdtsc + mov dword ptr global_time_hi,edx + pop rdx + mov dword ptr global_time_lo,eax + pop rax + ret + +allocate_function_profile_record_s: + call allocate_function_profile_record + jmp allocate_function_profile_record_sr + +use_tail_calling_function_s: + mov qword ptr global_last_tail_call,0 + jmp use_tail_calling_function_sr + + +; argument: rbp: function name adress-4 +; result: rbx: function profile record adress + +allocate_function_profile_record: + push rax + mov rax,qword ptr global_n_free_records_in_block + mov rbx,qword ptr global_last_allocated_block + + test rax,rax + jne no_alloc + + push rcx + push rdx + push rbp + + ifdef LINUX + sub rsp,104 + mov qword ptr [rsp],rsi + mov qword ptr 8[rsp],rdi + mov qword ptr 16[rsp],r8 + mov qword ptr 24[rsp],r10 + mov qword ptr 32[rsp],r11 + movsd qword ptr 40[rsp],xmm0 + movsd qword ptr 48[rsp],xmm1 + movsd qword ptr 56[rsp],xmm2 + movsd qword ptr 64[rsp],xmm3 + movsd qword ptr 72[rsp],xmm4 + movsd qword ptr 80[rsp],xmm5 + movsd qword ptr 88[rsp],xmm6 + movsd qword ptr 96[rsp],xmm7 + else + sub rsp,72 + mov qword ptr [rsp],r8 + mov qword ptr 8[rsp],r10 + mov qword ptr 16[rsp],r11 + movsd qword ptr 24[rsp],xmm0 + movsd qword ptr 32[rsp],xmm1 + movsd qword ptr 40[rsp],xmm2 + movsd qword ptr 48[rsp],xmm3 + movsd qword ptr 56[rsp],xmm4 + movsd qword ptr 64[rsp],xmm5 + endif + + mov rbp,rsp + sub rsp,40 + and rsp,-16 + ifdef LINUX + mov rdi,8192 + ; 128*FunctionProfile + call malloc + else + mov rcx,128*FunctionProfile + call allocate_memory + endif + mov rsp,rbp + + ifdef LINUX + mov rsi,qword ptr [rsp] + mov rdi,qword ptr 8[rsp] + mov r8,qword ptr 16[rsp] + mov r10,qword ptr 24[rsp] + mov r11,qword ptr 32[rsp] + movlpd xmm0,qword ptr 40[rsp] + movlpd xmm1,qword ptr 48[rsp] + movlpd xmm2,qword ptr 56[rsp] + movlpd xmm3,qword ptr 64[rsp] + movlpd xmm4,qword ptr 72[rsp] + movlpd xmm5,qword ptr 80[rsp] + movlpd xmm6,qword ptr 88[rsp] + movlpd xmm7,qword ptr 96[rsp] + add rsp,104 + else + mov r8,qword ptr [rsp] + mov r10,qword ptr 8[rsp] + mov r11,qword ptr 16[rsp] + movlpd xmm0,qword ptr 24[rsp] + movlpd xmm1,qword ptr 32[rsp] + movlpd xmm2,qword ptr 40[rsp] + movlpd xmm3,qword ptr 48[rsp] + movlpd xmm4,qword ptr 56[rsp] + movlpd xmm5,qword ptr 64[rsp] + add rsp,72 + endif + + test rax,rax + + pop rbp + pop rdx + pop rcx + + je no_memory + + mov rbx,rax + mov rax,128 + mov qword ptr global_last_allocated_block,rbx + +no_alloc: + dec rax + mov qword ptr global_n_free_records_in_block,rax + lea rax,FunctionProfile[rbx] + mov qword ptr global_last_allocated_block,rax + + xor rax,rax + mov qword ptr time[rbx],rax + mov qword ptr n_profiler_calls[rbx],rax + mov qword ptr n_strict_calls[rbx],rax + mov qword ptr n_lazy_calls[rbx],rax + mov qword ptr n_curried_calls[rbx],rax + mov qword ptr n_words_allocated[rbx],rax + + mov rax,qword ptr global_profile_records + mov qword ptr name_[rbx],rbp + + mov qword ptr next[rbx],rax + mov qword ptr global_profile_records,rbx + + mov qword ptr [rbp],rbx + pop rax + ret + +no_memory: + lea rbp,not_enough_memory_for_profiler + pop rax + jmp print_error + +write_profile_information: + mov rbp,rsp + sub rsp,40 + and rsp,-16 + ifdef LINUX + mov r13,rsi + mov r14,rdi + lea rdi,profile_file_name + else + lea rcx,profile_file_name + endif + call create_profile_file_name + ifdef LINUX + mov rsi,r13 + mov rdi,r14 + endif + mov rsp,rbp + + mov rax,1 + lea rcx,profile_file_name + call openF + + test r10,r10 + je cannot_open + + mov rbp,qword ptr global_profile_records + +write_profile_lp: + test rbp,rbp + je end_list + + mov rdx,qword ptr name_[rbp] + + push rbp + + push rdx + + mov edx,dword ptr (-4)[rdx] + mov eax,dword ptr [rdx] + add rdx,4 + +write_module_name_lp: + sub rax,1 + jc end_module_name + + push rax + push rdx + + movzx r10,byte ptr [rdx] + call writeFC + + pop rdx + pop rax + + add rdx,1 + jmp write_module_name_lp + +end_module_name: + mov r10,' ' + call writeFC + + pop rdx + + add rdx,7 + +write_function_name_lp: + movzx r10,byte ptr 1[rdx] + add rdx,1 + + test r10,r10 + je end_function_name + + push rdx + + call writeFC + + pop rdx + + jmp write_function_name_lp + +end_function_name: + mov r10,' ' + call writeFC + + mov rbp,qword ptr [rsp] + mov r10,qword ptr n_strict_calls[rbp] + call writeFI_space + + mov rbp,qword ptr [rsp] + mov r10,qword ptr n_lazy_calls[rbp] + call writeFI_space + + mov rbp,qword ptr [rsp] + mov r10,qword ptr n_curried_calls[rbp] + call writeFI_space + + mov rbp,qword ptr [rsp] + mov r10,qword ptr n_profiler_calls[rbp] + call writeFI_space + + mov rbp,qword ptr [rsp] + mov r10,qword ptr n_words_allocated[rbp] + call writeFI_space + + mov rbp,qword ptr [rsp] + mov r10,qword ptr time[rbp] + call writeFI + + mov r10,10 + call writeFC + + pop rbp + mov rbp,qword ptr next[rbp] + jmp write_profile_lp + +writeFI_space: + call writeFI + + mov r10,' ' + jmp writeFC + +end_list: + call closeF + +cannot_open: + ret + +write_profile_stack: + mov rax,qword ptr profile_stack_pointer + + test rax,rax + je stack_not_initialised + + push rax + + mov rbp,rsp + sub rsp,40 + and rsp,-16 + ifdef LINUX + mov r13,rsi + mov r14,rdi + lea rdi,stack_trace_string + else + lea rcx,stack_trace_string + endif + call ew_print_string + ifdef LINUX + mov rsi,r13 + mov rdi,r14 + endif + mov rsp,rbp + + pop rax + +; mov rbp,12 + mov rbp,qword ptr stack_trace_depth +write_functions_on_stack: + mov rbx,qword ptr (-8)[rax] + sub rax,8 + + test rbx,rbx + je end_profile_stack + + push rax + mov rcx,qword ptr name_[rbx] + + push rbp + + mov edx,dword ptr (-4)[rcx] + add rcx,8 + + mov r12d,dword ptr [rdx] + lea r13,4[rdx] + + mov rbp,rsp + sub rsp,40 + and rsp,-16 + ifdef LINUX + mov r11,rsi + mov r14,rdi + mov rdi,rcx + endif + call ew_print_string + + ifdef LINUX + lea rdi,module_string + else + lea rcx,module_string + endif + call ew_print_string + + ifdef LINUX + mov rsi,r12 + mov rdi,r13 + else + mov rdx,r12 + mov rcx,r13 + endif + call ew_print_text + + ifdef LINUX + mov rdi,']' + else + mov rcx,']' + endif + call ew_print_char + + ifdef LINUX + mov rdi,10 + else + mov rcx,10 + endif + call ew_print_char + +ifdef LINUX + mov rsi,r11 + mov rdi,r14 + endif + mov rsp,rbp + + pop rbp + pop rax + + sub rbp,1 + jne write_functions_on_stack + +end_profile_stack: +stack_not_initialised: + ret + +init_profiler: + mov rbp,rsp + sub rsp,40 + and rsp,-16 + ifdef LINUX + mov r13,rsi + mov r14,rdi + mov rdi,qword ptr ab_stack_size + call malloc + mov rsi,r13 + mov rdi,r14 + else + mov rcx,qword ptr ab_stack_size + call allocate_memory + endif + mov rsp,rbp + + test rax,rax + je init_profiler_error + + push rax + + lea rbp,start_string + call allocate_function_profile_record + + pop rdx + + mov qword ptr 8[rdx],rbx + mov qword ptr [rdx],0 + add rdx,16 + mov qword ptr profile_stack_pointer,rdx + mov qword ptr global_last_tail_call,0 + + mov qword ptr global_n_words_free,r15 + + rdtsc + mov dword ptr global_time_hi,edx + mov dword ptr global_time_lo,eax + ret + +init_profiler_error: + mov qword ptr profile_stack_pointer,0 + lea rbp,not_enough_memory_for_profile_stack + jmp print_error + +_TEXT ends + + _DATA segment + + align (1 shl 3) + +global_n_free_records_in_block: + dq 0 +; 0 n free records in block +global_last_allocated_block: + dq 0 +; 8 latest allocated block +global_profile_records: + dq 0 +; 16 profile record list +global_time_hi: + dd 0 +; 24 clock +global_time_lo: + dd 0 +global_last_tail_call: + dq 0 +; last tail calling function +global_n_words_free: + dq 0 + +profile_file_name: + dq __STRING__+2 + dq 0 + dq 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + dq 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + dq 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + dq 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + dq 0 +stack_trace_depth: + dq 12 + align (1 shl 3) + +; m_system also defined in istartup.s +; m_system: +; dq 6 +; db "System" +; db 0 +; db 0 + + dd m_system +start_string: + dq 0 + db "start" + db 0 + align (1 shl 3) +not_enough_memory_for_profile_stack: + db "not enough memory for profile stack" + db 10 + db 0 +not_enough_memory_for_profiler: + db "not enough memory for profiler" + db 10 + db 0 +stack_trace_string: + db "Stack trace:" + db 10 + db 0 +module_string: + db " [module: " + db 0 + align (1 shl 3) + +_DATA ends + +; end + diff --git a/atrace.asm b/atrace.asm new file mode 100644 index 0000000..b56ffe0 --- /dev/null +++ b/atrace.asm @@ -0,0 +1,442 @@ + +_TEXT segment para 'CODE' +_TEXT ends +_DATA segment para 'DATA' +_DATA ends + + _TEXT segment + + public init_profiler + public profile_r + public profile_l + public profile_l2 + public profile_n + public profile_n2 + public profile_s + public profile_s2 + public profile_t + public write_profile_stack + public stack_trace_depth + + ifndef LINUX + extrn allocate_memory:near + endif + extrn __STRING__:near + extrn ab_stack_size:near + extrn ew_print_string:near + extrn ew_print_char:near + extrn ew_print_text:near +; extrn print_error:near +; extrn profile_stack_pointer:near + +next = 0 +name_ = 8 +FunctionProfile = 16 + +profile_t: + sub qword ptr profile_stack_pointer,8 + ret + +profile_r: + sub qword ptr profile_stack_pointer,8 + ret + +profile_l: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_l +allocate_function_profile_record_lr: + mov rbp,qword ptr profile_stack_pointer + + mov qword ptr [rbp],rbx + add rbp,8 + mov qword ptr profile_stack_pointer,rbp + + pop rbx + ret + +allocate_function_profile_record_l: + call allocate_function_profile_record + jmp allocate_function_profile_record_lr + +profile_l2: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_l2 +allocate_function_profile_record_l2r: + mov rbp,qword ptr profile_stack_pointer + + mov qword ptr [rbp],rbx + mov qword ptr 8[rbp],rbx + add rbp,16 + mov qword ptr profile_stack_pointer,rbp + + pop rbx + ret + +allocate_function_profile_record_l2: + call allocate_function_profile_record + jmp allocate_function_profile_record_l2r + +profile_n: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_n +allocate_function_profile_record_nr: + mov rbp,qword ptr profile_stack_pointer + + mov qword ptr [rbp],rbx + add rbp,8 + mov qword ptr profile_stack_pointer,rbp + + pop rbx + ret + +allocate_function_profile_record_n: + call allocate_function_profile_record + jmp allocate_function_profile_record_nr + +profile_n2: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_n2 +allocate_function_profile_record_n2r: + mov rbp,qword ptr profile_stack_pointer + + mov qword ptr [rbp],rbx + mov qword ptr 8[rbp],rbx + add rbp,16 + mov qword ptr profile_stack_pointer,rbp + + pop rbx + ret + +allocate_function_profile_record_n2: + call allocate_function_profile_record + jmp allocate_function_profile_record_n2r + +profile_s2: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_s2 +allocate_function_profile_record_s2r: + mov rbp,qword ptr profile_stack_pointer + + mov qword ptr [rbp],rbx + mov qword ptr 8[rbp],rbx + add rbp,16 + mov qword ptr profile_stack_pointer,rbp + + pop rbx + ret + +allocate_function_profile_record_s2: + call allocate_function_profile_record + jmp allocate_function_profile_record_s2r + +profile_s: + push rbx + mov rbx,qword ptr [rbp] + + test rbx,rbx + je allocate_function_profile_record_s +allocate_function_profile_record_sr: + mov rbp,qword ptr profile_stack_pointer + + mov qword ptr [rbp],rbx + add rbp,8 + mov qword ptr profile_stack_pointer,rbp + + pop rbx + ret + +allocate_function_profile_record_s: + call allocate_function_profile_record + jmp allocate_function_profile_record_sr + +; argument: rbp: function name adress-4 +; result: rbx: function profile record adress + +allocate_function_profile_record: + push rax + mov rax,qword ptr global_n_free_records_in_block + mov rbx,qword ptr global_last_allocated_block + + test rax,rax + jne no_alloc + + push rcx + push rdx + push rbp + + ifdef LINUX + sub rsp,104 + mov qword ptr [rsp],rsi + mov qword ptr 8[rsp],rdi + mov qword ptr 16[rsp],r8 + mov qword ptr 24[rsp],r10 + mov qword ptr 32[rsp],r11 + movsd qword ptr 40[rsp],xmm0 + movsd qword ptr 48[rsp],xmm1 + movsd qword ptr 56[rsp],xmm2 + movsd qword ptr 64[rsp],xmm3 + movsd qword ptr 72[rsp],xmm4 + movsd qword ptr 80[rsp],xmm5 + movsd qword ptr 88[rsp],xmm6 + movsd qword ptr 96[rsp],xmm7 + else + sub rsp,72 + mov qword ptr [rsp],r8 + mov qword ptr 8[rsp],r10 + mov qword ptr 16[rsp],r11 + movsd qword ptr 24[rsp],xmm0 + movsd qword ptr 32[rsp],xmm1 + movsd qword ptr 40[rsp],xmm2 + movsd qword ptr 48[rsp],xmm3 + movsd qword ptr 56[rsp],xmm4 + movsd qword ptr 64[rsp],xmm5 + endif + + mov rbp,rsp + sub rsp,40 + and rsp,-16 + ifdef LINUX + mov rdi,8192 + ; 512*FunctionProfile + call malloc + else + mov rcx,512*FunctionProfile + call allocate_memory + endif + mov rsp,rbp + + ifdef LINUX + mov rsi,qword ptr [rsp] + mov rdi,qword ptr 8[rsp] + mov r8,qword ptr 16[rsp] + mov r10,qword ptr 24[rsp] + mov r11,qword ptr 32[rsp] + movlpd xmm0,qword ptr 40[rsp] + movlpd xmm1,qword ptr 48[rsp] + movlpd xmm2,qword ptr 56[rsp] + movlpd xmm3,qword ptr 64[rsp] + movlpd xmm4,qword ptr 72[rsp] + movlpd xmm5,qword ptr 80[rsp] + movlpd xmm6,qword ptr 88[rsp] + movlpd xmm7,qword ptr 96[rsp] + add rsp,104 + else + mov r8,qword ptr [rsp] + mov r10,qword ptr 8[rsp] + mov r11,qword ptr 16[rsp] + movlpd xmm0,qword ptr 24[rsp] + movlpd xmm1,qword ptr 32[rsp] + movlpd xmm2,qword ptr 40[rsp] + movlpd xmm3,qword ptr 48[rsp] + movlpd xmm4,qword ptr 56[rsp] + movlpd xmm5,qword ptr 64[rsp] + add rsp,72 + endif + + test rax,rax + + pop rbp + pop rdx + pop rcx + + je no_memory + + mov rbx,rax + mov rax,512 + mov qword ptr global_last_allocated_block,rbx + +no_alloc: + dec rax + mov qword ptr global_n_free_records_in_block,rax + lea rax,FunctionProfile[rbx] + mov qword ptr global_last_allocated_block,rax + + mov rax,qword ptr global_profile_records + mov qword ptr name_[rbx],rbp + + mov qword ptr next[rbx],rax + mov qword ptr global_profile_records,rbx + + mov qword ptr [rbp],rbx + pop rax + ret + +no_memory: + lea rbp,not_enough_memory_for_profiler + pop rax + jmp print_error + +write_profile_stack: + mov rax,qword ptr profile_stack_pointer + + test rax,rax + je stack_not_initialised + + push rax + + mov rbp,rsp + sub rsp,40 + and rsp,-16 + lea rcx,stack_trace_string + call ew_print_string + mov rsp,rbp + + pop rax + +; mov rbp,12 + mov rbp,qword ptr stack_trace_depth +write_functions_on_stack: + mov rbx,qword ptr (-8)[rax] + sub rax,8 + + test rbx,rbx + je end_profile_stack + + push rax + mov rcx,qword ptr name_[rbx] + + push rbp + + mov edx,dword ptr (-4)[rcx] + add rcx,8 + + mov r12d,dword ptr [rdx] + lea r13,4[rdx] + + mov rbp,rsp + sub rsp,40 + and rsp,-16 + + call ew_print_string + + lea rcx,module_string + call ew_print_string + + mov rdx,r12 + mov rcx,r13 + call ew_print_text + + mov rcx,']' + call ew_print_char + + mov rcx,10 + call ew_print_char + + mov rsp,rbp + + pop rbp + pop rax + + sub rbp,1 + jne write_functions_on_stack + +end_profile_stack: +stack_not_initialised: + ret + +init_profiler: + mov rbp,rsp + sub rsp,40 + and rsp,-16 + ifdef LINUX + mov r13,rsi + mov r14,rdi + mov rdi,qword ptr ab_stack_size + call malloc + mov rsi,r13 + mov rdi,r14 + else + mov rcx,qword ptr ab_stack_size + call allocate_memory + endif + mov rsp,rbp + + test rax,rax + je init_profiler_error + + push rax + + lea rbp,start_string + call allocate_function_profile_record + + pop rdx + + mov qword ptr 8[rdx],rbx + mov qword ptr [rdx],0 + add rdx,16 + mov qword ptr profile_stack_pointer,rdx + ret + +init_profiler_error: + mov qword ptr profile_stack_pointer,0 + lea rbp,not_enough_memory_for_profile_stack + jmp print_error + +_TEXT ends + + _DATA segment + + align (1 shl 3) + +global_n_free_records_in_block: + dq 0 +; 0 n free records in block +global_last_allocated_block: + dq 0 +; 8 latest allocated block +global_profile_records: + dq 0 +; 16 profile record list + +stack_trace_depth: + dq 12 + align (1 shl 3) + +; m_system also defined in istartup.s +; m_system: +; dq 6 +; db "System" +; db 0 +; db 0 + + dd m_system +start_string: + dq 0 + db "start" + db 0 + align (1 shl 3) +not_enough_memory_for_profile_stack: + db "not enough memory for profile stack" + db 10 + db 0 +not_enough_memory_for_profiler: + db "not enough memory for profiler" + db 10 + db 0 +stack_trace_string: + db "Stack trace:" + db 10 + db 0 +module_string: + db " [module: " + db 0 + align (1 shl 3) + +_DATA ends + +; end -- cgit v1.2.3