From e3910aabcb617a6e1fb0e7057e2f65a9a3be2617 Mon Sep 17 00:00:00 2001 From: John van Groningen Date: Mon, 13 Dec 2010 11:47:06 +0000 Subject: optimize for cpu with a 128 bit sse unit (instead of 64): use mosvd instead of movlpd for loads and use movapd instead of movsd for register to register moves, add -sse64 option to optimize for 64 bit sse, --- cg.c | 8 +++++++ cgaas.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------ cgawas.c | 26 +++++++++++---------- 3 files changed, 87 insertions(+), 27 deletions(-) diff --git a/cg.c b/cg.c index 9d06db9..e70abb4 100644 --- a/cg.c +++ b/cg.c @@ -617,6 +617,10 @@ extern int compiler_id; extern int intel_asm; #endif +#ifdef G_AI64 +extern int sse_128; +#endif + #ifdef MAIN_CLM # if !(defined (__MWERKS__) && defined (__cplusplus)) # ifdef CG_PPC_XO @@ -704,6 +708,10 @@ int main (int argc,char **argv) #ifdef I486 else if (!strcmp (s,"intelasm")) intel_asm=1; +#endif +#ifdef G_AI64 + else if (!strcmp (s,"sse64")) + sse_128=0; #endif else if (!strcmp (s,"mc68000")){ mc68000_flag=1; diff --git a/cgaas.c b/cgaas.c index 8678cf9..8285faf 100644 --- a/cgaas.c +++ b/cgaas.c @@ -57,6 +57,8 @@ #define U4(s,v1,v2,v3,v4) s->v1;s->v2;s->v3;s->v4 #define U5(s,v1,v2,v3,v4,v5) s->v1;s->v2;s->v3;s->v4;s->v5 +int sse_128=1; + #ifdef FUNCTION_LEVEL_LINKING # define TEXT_LABEL_ID (-2) # define DATA_LABEL_ID (-3) @@ -3612,22 +3614,40 @@ static void as_fmove_instruction (struct instruction *instruction) case P_F_REGISTER: switch (instruction->instruction_parameters[0].parameter_type){ case P_F_REGISTER: + if (sse_128) + as_f_r (0x66,0x28,instruction->instruction_parameters[0].parameter_data.reg.r, + /* movapd */ instruction->instruction_parameters[1].parameter_data.reg.r); + else as_f_r (0xf2,0x10,instruction->instruction_parameters[0].parameter_data.reg.r, - instruction->instruction_parameters[1].parameter_data.reg.r); + /* movsd*/ instruction->instruction_parameters[1].parameter_data.reg.r); return; case P_INDIRECT: + if (sse_128) + as_f_id (0xf2,0x10,instruction->instruction_parameters[0].parameter_offset, + /* movsd */ instruction->instruction_parameters[0].parameter_data.reg.r, + instruction->instruction_parameters[1].parameter_data.reg.r); + else as_f_id (0x66,0x12,instruction->instruction_parameters[0].parameter_offset, - instruction->instruction_parameters[0].parameter_data.reg.r, + /* movlpd */ instruction->instruction_parameters[0].parameter_data.reg.r, instruction->instruction_parameters[1].parameter_data.reg.r); return; case P_INDEXED: + if (sse_128) + as_f_x (0xf2,0x10,instruction->instruction_parameters[0].parameter_offset, + /* movsd */ instruction->instruction_parameters[0].parameter_data.ir, + instruction->instruction_parameters[1].parameter_data.reg.r); + else as_f_x (0x66,0x12,instruction->instruction_parameters[0].parameter_offset, - instruction->instruction_parameters[0].parameter_data.ir, + /* movlpd */ instruction->instruction_parameters[0].parameter_data.ir, instruction->instruction_parameters[1].parameter_data.reg.r); return; case P_F_IMMEDIATE: + if (sse_128) + as_f_i (0xf2,0x10,instruction->instruction_parameters[0].parameter_data.r, + /* movsd */ instruction->instruction_parameters[1].parameter_data.reg.r); + else as_f_i (0x66,0x12,instruction->instruction_parameters[0].parameter_data.r, - instruction->instruction_parameters[1].parameter_data.reg.r); + /* movlpd */ instruction->instruction_parameters[1].parameter_data.reg.r); return; } break; @@ -3687,7 +3707,7 @@ static void as_fmoves_instruction (struct instruction *instruction) break; case P_INDEXED: if (instruction->instruction_parameters[0].parameter_type==P_F_REGISTER){ - /* movsd */ + /* movss */ as_f_x (0xf3,0x11,instruction->instruction_parameters[1].parameter_offset, instruction->instruction_parameters[1].parameter_data.ir, instruction->instruction_parameters[0].parameter_data.reg.r); @@ -3734,19 +3754,34 @@ static void as_float_neg_instruction (struct instruction *instruction) switch (instruction->instruction_parameters[0].parameter_type){ case P_F_REGISTER: - if (instruction->instruction_parameters[0].parameter_data.reg.r!=d_freg) - as_f_r (0xf2,0x10,instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); + if (instruction->instruction_parameters[0].parameter_data.reg.r!=d_freg){ + if (sse_128) + as_f_r (0x66,0x28,instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); /* movapd */ + else + as_f_r (0xf2,0x10,instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); /* movsd */ + } break; case P_INDIRECT: + if (sse_128) + as_f_id (0xf2,0x10,instruction->instruction_parameters[0].parameter_offset, + /* movsd */ instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); + else as_f_id (0x66,0x12,instruction->instruction_parameters[0].parameter_offset, - instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); + /* movlpd */ instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); break; case P_INDEXED: + if (sse_128) + as_f_x (0xf2,0x10,instruction->instruction_parameters[0].parameter_offset, + /* movsd */ instruction->instruction_parameters[0].parameter_data.ir,d_freg); + else as_f_x (0x66,0x12,instruction->instruction_parameters[0].parameter_offset, - instruction->instruction_parameters[0].parameter_data.ir,d_freg); + /* movlpd */ instruction->instruction_parameters[0].parameter_data.ir,d_freg); break; case P_F_IMMEDIATE: - as_f_i (0x66,0x12,instruction->instruction_parameters[0].parameter_data.r,d_freg); + if (sse_128) + as_f_i (0xf2,0x10,instruction->instruction_parameters[0].parameter_data.r,d_freg); /* movsd */ + else + as_f_i (0x66,0x12,instruction->instruction_parameters[0].parameter_data.r,d_freg); /* movlpd */ break; default: internal_error_in_function ("as_float_neg_instruction"); @@ -3792,19 +3827,34 @@ static void as_float_abs_instruction (struct instruction *instruction) switch (instruction->instruction_parameters[0].parameter_type){ case P_F_REGISTER: - if (instruction->instruction_parameters[0].parameter_data.reg.r!=d_freg) - as_f_r (0xf2,0x10,instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); + if (instruction->instruction_parameters[0].parameter_data.reg.r!=d_freg){ + if (sse_128) + as_f_r (0x66,0x28,instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); /* movapd */ + else + as_f_r (0xf2,0x10,instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); /* movsd */ + } break; case P_INDIRECT: + if (sse_128) + as_f_id (0xf2,0x10,instruction->instruction_parameters[0].parameter_offset, + /* movsd */ instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); + else as_f_id (0x66,0x12,instruction->instruction_parameters[0].parameter_offset, - instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); + /* movlpd */ instruction->instruction_parameters[0].parameter_data.reg.r,d_freg); break; case P_INDEXED: + if (sse_128) + as_f_x (0xf2,0x10,instruction->instruction_parameters[0].parameter_offset, + /* movsd */ instruction->instruction_parameters[0].parameter_data.ir,d_freg); + else as_f_x (0x66,0x12,instruction->instruction_parameters[0].parameter_offset, - instruction->instruction_parameters[0].parameter_data.ir,d_freg); + /* movlpd */ instruction->instruction_parameters[0].parameter_data.ir,d_freg); break; case P_F_IMMEDIATE: - as_f_i (0x66,0x12,instruction->instruction_parameters[0].parameter_data.r,d_freg); + if (sse_128) + as_f_i (0xf2,0x10,instruction->instruction_parameters[0].parameter_data.r,d_freg); /* movsd */ + else + as_f_i (0x66,0x12,instruction->instruction_parameters[0].parameter_data.r,d_freg); /* movlpd */ break; default: internal_error_in_function ("as_float_abs_instruction"); diff --git a/cgawas.c b/cgawas.c index 99a27c4..b02c334 100644 --- a/cgawas.c +++ b/cgawas.c @@ -29,6 +29,8 @@ int intel_asm=1; +extern int sse_128; + #define for_l(v,l,n) for(v=(l);v!=NULL;v=v->n) #define IO_BUF_SIZE 8192 @@ -2638,7 +2640,7 @@ static void w_as_float_neg_instruction (struct instruction *instruction) w_as_float_constant (label_number,instruction->instruction_parameters[0].parameter_data.r); - w_as_opcode ("movlpd"); + w_as_opcode (sse_128 ? "movsd" : "movlpd"); w_as_fp_register (d_freg); w_as_comma(); fprintf (assembly_file,"qword ptr i_%d",label_number); @@ -2646,7 +2648,7 @@ static void w_as_float_neg_instruction (struct instruction *instruction) break; } case P_INDIRECT: - w_as_opcode ("movlpd"); + w_as_opcode (sse_128 ? "movsd" : "movlpd"); w_as_fp_register (d_freg); w_as_comma(); fprintf (assembly_file,"qword ptr "); @@ -2654,7 +2656,7 @@ static void w_as_float_neg_instruction (struct instruction *instruction) w_as_newline(); break; case P_INDEXED: - w_as_opcode ("movlpd"); + w_as_opcode (sse_128 ? "movsd" : "movlpd"); w_as_fp_register (d_freg); w_as_comma(); fprintf (assembly_file,"qword ptr "); @@ -2663,7 +2665,7 @@ static void w_as_float_neg_instruction (struct instruction *instruction) break; case P_F_REGISTER: if (instruction->instruction_parameters[0].parameter_data.reg.r!=d_freg){ - w_as_opcode ("movsd"); + w_as_opcode (sse_128 ? "movapd" : "movsd"); w_as_fp_register (d_freg); w_as_comma(); w_as_fp_register (instruction->instruction_parameters[0].parameter_data.reg.r); @@ -2705,7 +2707,7 @@ static void w_as_float_abs_instruction (struct instruction *instruction) w_as_float_constant (label_number,instruction->instruction_parameters[0].parameter_data.r); - w_as_opcode ("movlpd"); + w_as_opcode (sse_128 ? "movsd" : "movlpd"); w_as_fp_register (d_freg); w_as_comma(); fprintf (assembly_file,"qword ptr i_%d",label_number); @@ -2713,7 +2715,7 @@ static void w_as_float_abs_instruction (struct instruction *instruction) break; } case P_INDIRECT: - w_as_opcode ("movlpd"); + w_as_opcode (sse_128 ? "movsd" : "movlpd"); w_as_fp_register (d_freg); w_as_comma(); fprintf (assembly_file,"qword ptr "); @@ -2721,7 +2723,7 @@ static void w_as_float_abs_instruction (struct instruction *instruction) w_as_newline(); break; case P_INDEXED: - w_as_opcode ("movlpd"); + w_as_opcode (sse_128 ? "movsd" : "movlpd"); w_as_fp_register (d_freg); w_as_comma(); fprintf (assembly_file,"qword ptr "); @@ -2730,7 +2732,7 @@ static void w_as_float_abs_instruction (struct instruction *instruction) break; case P_F_REGISTER: if (instruction->instruction_parameters[0].parameter_data.reg.r!=d_freg){ - w_as_opcode ("movsd"); + w_as_opcode (sse_128 ? "movapd" : "movsd"); w_as_fp_register (d_freg); w_as_comma(); w_as_fp_register (instruction->instruction_parameters[0].parameter_data.reg.r); @@ -2763,13 +2765,13 @@ static void w_as_fmove_instruction (struct instruction *instruction) case P_F_REGISTER: switch (instruction->instruction_parameters[0].parameter_type){ case P_F_REGISTER: - w_as_opcode ("movsd"); + w_as_opcode (sse_128 ? "movapd" : "movsd"); w_as_fp_register (instruction->instruction_parameters[1].parameter_data.reg.r); w_as_comma(); w_as_fp_register (instruction->instruction_parameters[0].parameter_data.reg.r); break; case P_INDIRECT: - w_as_opcode ("movlpd"); + w_as_opcode (sse_128 ? "movsd" : "movlpd"); w_as_fp_register (instruction->instruction_parameters[1].parameter_data.reg.r); w_as_comma(); if (intel_asm) @@ -2778,7 +2780,7 @@ static void w_as_fmove_instruction (struct instruction *instruction) instruction->instruction_parameters[0].parameter_data.reg.r); break; case P_INDEXED: - w_as_opcode ("movlpd"); + w_as_opcode (sse_128 ? "movsd" : "movlpd"); w_as_fp_register (instruction->instruction_parameters[1].parameter_data.reg.r); w_as_comma(); if (intel_asm) @@ -2792,7 +2794,7 @@ static void w_as_fmove_instruction (struct instruction *instruction) w_as_float_constant (label_number,instruction->instruction_parameters[0].parameter_data.r); - w_as_opcode ("movlpd"); + w_as_opcode (sse_128 ? "movsd" : "movlpd"); w_as_fp_register (instruction->instruction_parameters[1].parameter_data.reg.r); w_as_comma(); if (intel_asm) -- cgit v1.2.3