X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fi386.c;h=c9781e147a52d79ad748d9dc88956be69c6c2822;hp=fbebd4c48cba4b2ea28576cffbd08f2c63c83259;hb=33541f98a3ec41f15aa971e3fe350c546b1f1117;hpb=1c38a1776cf10bdc144967886b01c50b5bff2ae7 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index fbebd4c48cb..c9781e147a5 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -120,7 +120,7 @@ check_avx256_stores (rtx dest, const_rtx set, void *data) /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper in basic block BB. Delete it if upper 128bit AVX registers are unused. If it isn't deleted, move it to just before a jump insn. - + STATE is state of the upper 128bits of AVX registers at entry. */ static void @@ -2168,7 +2168,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for the auto-vectorizer. */ - m_BDVER + m_BDVER }; /* Feature tests against the various architecture variations. */ @@ -2663,6 +2663,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, { "-mmmx", OPTION_MASK_ISA_MMX }, { "-mabm", OPTION_MASK_ISA_ABM }, { "-mbmi", OPTION_MASK_ISA_BMI }, + { "-mlzcnt", OPTION_MASK_ISA_LZCNT }, { "-mtbm", OPTION_MASK_ISA_TBM }, { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, { "-mmovbe", OPTION_MASK_ISA_MOVBE }, @@ -2917,7 +2918,8 @@ ix86_option_override_internal (bool main_args_p) PTA_RDRND = 1 << 25, PTA_F16C = 1 << 26, PTA_BMI = 1 << 27, - PTA_TBM = 1 << 28 + PTA_TBM = 1 << 28, + PTA_LZCNT = 1 << 29 /* if this reaches 32, need to widen struct pta flags below */ }; @@ -2967,6 +2969,11 @@ ix86_option_override_internal (bool main_args_p) PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL}, + {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX + | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE + | PTA_RDRND | PTA_F16C}, {"atom", PROCESSOR_ATOM, CPU_ATOM, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE}, @@ -3278,6 +3285,9 @@ ix86_option_override_internal (bool main_args_p) if (processor_alias_table[i].flags & PTA_BMI && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) ix86_isa_flags |= OPTION_MASK_ISA_BMI; + if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM) + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT)) + ix86_isa_flags |= OPTION_MASK_ISA_LZCNT; if (processor_alias_table[i].flags & PTA_TBM && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) ix86_isa_flags |= OPTION_MASK_ISA_TBM; @@ -3525,6 +3535,10 @@ ix86_option_override_internal (bool main_args_p) if (TARGET_SSE4_2 || TARGET_ABM) ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit; + /* Turn on lzcnt instruction for -mabm. */ + if (TARGET_ABM) + ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit; + /* Validate -mpreferred-stack-boundary= value or default it to PREFERRED_STACK_BOUNDARY_DEFAULT. */ ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; @@ -3772,7 +3786,7 @@ ix86_option_override_internal (bool main_args_p) target_flags |= MASK_PREFER_AVX128; } } - else + else { /* Disable vzeroupper pass if TARGET_AVX is disabled. */ target_flags &= ~MASK_VZEROUPPER; @@ -4030,6 +4044,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("3dnow", OPT_m3dnow), IX86_ATTR_ISA ("abm", OPT_mabm), IX86_ATTR_ISA ("bmi", OPT_mbmi), + IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt), IX86_ATTR_ISA ("tbm", OPT_mtbm), IX86_ATTR_ISA ("aes", OPT_maes), IX86_ATTR_ISA ("avx", OPT_mavx), @@ -4692,8 +4707,8 @@ ix86_function_ok_for_sibcall (tree decl, tree exp) optimize any indirect call, or a direct call to a global function, as the PLT requires %ebx be live. (Darwin does not have a PLT.) */ if (!TARGET_MACHO - && !TARGET_64BIT - && flag_pic + && !TARGET_64BIT + && flag_pic && (!decl || !targetm.binds_local_p (decl))) return false; @@ -7078,6 +7093,11 @@ function_value_64 (enum machine_mode orig_mode, enum machine_mode mode, return gen_rtx_REG (mode, AX_REG); } } + else if (POINTER_TYPE_P (valtype)) + { + /* Pointers are always returned in Pmode. */ + mode = Pmode; + } ret = construct_container (mode, orig_mode, valtype, 1, X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, @@ -7147,6 +7167,22 @@ ix86_function_value (const_tree valtype, const_tree fntype_or_decl, return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); } +/* Pointer function arguments and return values are promoted to Pmode. */ + +static enum machine_mode +ix86_promote_function_mode (const_tree type, enum machine_mode mode, + int *punsignedp, const_tree fntype, + int for_return) +{ + if (type != NULL_TREE && POINTER_TYPE_P (type)) + { + *punsignedp = POINTERS_EXTEND_UNSIGNED; + return Pmode; + } + return default_promote_function_mode (type, mode, punsignedp, fntype, + for_return); +} + rtx ix86_libcall_value (enum machine_mode mode) { @@ -7474,6 +7510,11 @@ setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) alias_set_type set = get_varargs_alias_set (); int i; + /* Reset to zero, as there might be a sysv vaarg used + before. */ + ix86_varargs_gpr_size = 0; + ix86_varargs_fpr_size = 0; + for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) { rtx reg, mem; @@ -8297,10 +8338,6 @@ ix86_code_end (void) rtx xops[2]; int regno; -#ifdef TARGET_SOLARIS - solaris_code_end (); -#endif - for (regno = AX_REG; regno <= SP_REG; regno++) { char name[32]; @@ -8705,16 +8742,12 @@ ix86_compute_frame_layout (struct ix86_frame *frame) cfun->machine->use_fast_prologue_epilogue = !expensive_function_p (count); } - if (TARGET_PROLOGUE_USING_MOVE - && cfun->machine->use_fast_prologue_epilogue) - frame->save_regs_using_mov = true; - else - frame->save_regs_using_mov = false; - /* If static stack checking is enabled and done with probes, the registers - need to be saved before allocating the frame. */ - if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) - frame->save_regs_using_mov = false; + frame->save_regs_using_mov + = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue + /* If static stack checking is enabled and done with probes, + the registers need to be saved before allocating the frame. */ + && flag_stack_check != STATIC_BUILTIN_STACK_CHECK); /* Skip return address. */ offset = UNITS_PER_WORD; @@ -8863,7 +8896,7 @@ choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) return len; } - + /* Return an RTX that points to CFA_OFFSET within the stack frame. The valid base registers are taken from CFUN->MACHINE->FS. */ @@ -10209,9 +10242,11 @@ ix86_expand_prologue (void) emit_insn (gen_cld ()); /* SEH requires that the prologue end within 256 bytes of the start of - the function. Prevent instruction schedules that would extend that. */ + the function. Prevent instruction schedules that would extend that. + Further, prevent alloca modifications to the stack pointer from being + combined with prologue modifications. */ if (TARGET_SEH) - emit_insn (gen_blockage ()); + emit_insn (gen_prologue_use (stack_pointer_rtx)); } /* Emit code to restore REG using a POP insn. */ @@ -10326,7 +10361,7 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, { rtx reg = gen_rtx_REG (Pmode, regno); rtx insn, mem; - + mem = choose_baseaddr (cfa_offset); mem = gen_frame_mem (Pmode, mem); insn = emit_move_insn (reg, mem); @@ -10674,7 +10709,7 @@ ix86_expand_epilogue (int style) if (TARGET_VZEROUPPER && !TREE_THIS_VOLATILE (cfun->decl) && !cfun->machine->caller_return_avx256_p) - emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256))); + emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256))); if (crtl->args.pops_args && crtl->args.size) { @@ -11069,6 +11104,30 @@ ix86_live_on_entry (bitmap regs) } } +/* Determine if op is suitable SUBREG RTX for address. */ + +static bool +ix86_address_subreg_operand (rtx op) +{ + enum machine_mode mode; + + if (!REG_P (op)) + return false; + + mode = GET_MODE (op); + + if (GET_MODE_CLASS (mode) != MODE_INT) + return false; + + /* Don't allow SUBREGs that span more than a word. It can lead to spill + failures when the register is one word out of a two word structure. */ + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return false; + + /* Allow only SUBREGs of non-eliminable hard registers. */ + return register_no_elim_operand (op, mode); +} + /* Extract the parts of an RTL expression that is a valid memory address for an instruction. Return 0 if the structure of the address is grossly off. Return -1 if the address contains ASHIFT, so it is not @@ -11085,12 +11144,19 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) int retval = 1; enum ix86_address_seg seg = SEG_DEFAULT; + /* Allow zero-extended SImode addresses, + they will be emitted with addr32 prefix. */ + if (TARGET_64BIT + && GET_CODE (addr) == ZERO_EXTEND + && GET_MODE (addr) == DImode + && GET_MODE (XEXP (addr, 0)) == SImode) + addr = XEXP (addr, 0); + if (REG_P (addr)) base = addr; else if (GET_CODE (addr) == SUBREG) { - /* Allow only subregs of DImode hard regs. */ - if (register_no_elim_operand (SUBREG_REG (addr), DImode)) + if (ix86_address_subreg_operand (SUBREG_REG (addr))) base = addr; else return 0; @@ -11148,8 +11214,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) break; case SUBREG: - /* Allow only subregs of DImode hard regs in PLUS chains. */ - if (!register_no_elim_operand (SUBREG_REG (op), DImode)) + if (!ix86_address_subreg_operand (SUBREG_REG (op))) return 0; /* FALLTHRU */ @@ -11201,9 +11266,10 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) { if (REG_P (index)) ; - /* Allow only subregs of DImode hard regs. */ else if (GET_CODE (index) == SUBREG - && !register_no_elim_operand (SUBREG_REG (index), DImode)) + && ix86_address_subreg_operand (SUBREG_REG (index))) + ; + else return 0; } @@ -11648,10 +11714,7 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, if (REG_P (base)) reg = base; else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base))) - { - reg = SUBREG_REG (base); - gcc_assert (register_no_elim_operand (reg, DImode)); - } + reg = SUBREG_REG (base); else /* Base is not a register. */ return false; @@ -11673,10 +11736,7 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, if (REG_P (index)) reg = index; else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index))) - { - reg = SUBREG_REG (index); - gcc_assert (register_no_elim_operand (reg, DImode)); - } + reg = SUBREG_REG (index); else /* Index is not a register. */ return false; @@ -12095,17 +12155,15 @@ legitimize_pic_address (rtx orig, rtx reg) static rtx get_thread_pointer (bool to_reg) { - rtx tp, reg, insn; + rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); - tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); - if (!to_reg) - return tp; + if (GET_MODE (tp) != Pmode) + tp = convert_to_mode (Pmode, tp, 1); - reg = gen_reg_rtx (Pmode); - insn = gen_rtx_SET (VOIDmode, reg, tp); - insn = emit_insn (insn); + if (to_reg) + tp = copy_addr_to_reg (tp); - return reg; + return tp; } /* Construct the SYMBOL_REF for the tls_get_addr function. */ @@ -12621,7 +12679,11 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, rtx temp = gen_reg_rtx (Pmode); rtx val = force_operand (XEXP (x, 1), temp); if (val != temp) - emit_move_insn (temp, val); + { + if (GET_MODE (val) != Pmode) + val = convert_to_mode (Pmode, val, 1); + emit_move_insn (temp, val); + } XEXP (x, 1) = temp; return x; @@ -12632,7 +12694,11 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, rtx temp = gen_reg_rtx (Pmode); rtx val = force_operand (XEXP (x, 0), temp); if (val != temp) - emit_move_insn (temp, val); + { + if (GET_MODE (val) != Pmode) + val = convert_to_mode (Pmode, val, 1); + emit_move_insn (temp, val); + } XEXP (x, 0) = temp; return x; @@ -12933,9 +12999,10 @@ ix86_delegitimize_address (rtx x) || !MEM_P (orig_x)) return ix86_delegitimize_tls_address (orig_x); x = XVECEXP (XEXP (x, 0), 0, 0); - if (GET_MODE (orig_x) != Pmode) + if (GET_MODE (orig_x) != GET_MODE (x)) { - x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0); + x = simplify_gen_subreg (GET_MODE (orig_x), x, + GET_MODE (x), 0); if (x == NULL_RTX) return orig_x; } @@ -14037,6 +14104,20 @@ ix86_print_operand_address (FILE *file, rtx addr) gcc_assert (ok); + if (parts.base && GET_CODE (parts.base) == SUBREG) + { + rtx tmp = SUBREG_REG (parts.base); + parts.base = simplify_subreg (GET_MODE (parts.base), + tmp, GET_MODE (tmp), 0); + } + + if (parts.index && GET_CODE (parts.index) == SUBREG) + { + rtx tmp = SUBREG_REG (parts.index); + parts.index = simplify_subreg (GET_MODE (parts.index), + tmp, GET_MODE (tmp), 0); + } + base = parts.base; index = parts.index; disp = parts.disp; @@ -14088,6 +14169,13 @@ ix86_print_operand_address (FILE *file, rtx addr) } else { + int code = 0; + + /* Print SImode registers for zero-extended addresses to force + addr32 prefix. Otherwise print DImode registers to avoid it. */ + if (TARGET_64BIT) + code = (GET_CODE (addr) == ZERO_EXTEND) ? 'l' : 'q'; + if (ASSEMBLER_DIALECT == ASM_ATT) { if (disp) @@ -14102,11 +14190,11 @@ ix86_print_operand_address (FILE *file, rtx addr) putc ('(', file); if (base) - print_reg (base, 0, file); + print_reg (base, code, file); if (index) { putc (',', file); - print_reg (index, 0, file); + print_reg (index, code, file); if (scale != 1) fprintf (file, ",%d", scale); } @@ -14141,7 +14229,7 @@ ix86_print_operand_address (FILE *file, rtx addr) putc ('[', file); if (base) { - print_reg (base, 0, file); + print_reg (base, code, file); if (offset) { if (INTVAL (offset) >= 0) @@ -14157,7 +14245,7 @@ ix86_print_operand_address (FILE *file, rtx addr) if (index) { putc ('+', file); - print_reg (index, 0, file); + print_reg (index, code, file); if (scale != 1) fprintf (file, "*%d", scale); } @@ -14865,7 +14953,7 @@ ix86_output_addr_vec_elt (FILE *file, int value) const char *directive = ASM_LONG; #ifdef ASM_QUAD - if (TARGET_64BIT) + if (TARGET_LP64) directive = ASM_QUAD; #else gcc_assert (!TARGET_64BIT); @@ -14962,6 +15050,8 @@ ix86_expand_move (enum machine_mode mode, rtx operands[]) op1 = force_operand (op1, op0); if (op1 == op0) return; + if (GET_MODE (op1) != mode) + op1 = convert_to_mode (mode, op1, 1); } else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op1)) @@ -14989,11 +15079,13 @@ ix86_expand_move (enum machine_mode mode, rtx operands[]) op0, 1, OPTAB_DIRECT); if (tmp == op0) return; + if (GET_MODE (tmp) != mode) + op1 = convert_to_mode (mode, tmp, 1); } } - if ((flag_pic || MACHOPIC_INDIRECT) - && mode == Pmode && symbolic_operand (op1, Pmode)) + if ((flag_pic || MACHOPIC_INDIRECT) + && symbolic_operand (op1, mode)) { if (TARGET_MACHO && !TARGET_64BIT) { @@ -15034,13 +15126,15 @@ ix86_expand_move (enum machine_mode mode, rtx operands[]) else { if (MEM_P (op0)) - op1 = force_reg (Pmode, op1); - else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode)) + op1 = force_reg (mode, op1); + else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode))) { rtx reg = can_create_pseudo_p () ? NULL_RTX : op0; op1 = legitimize_pic_address (op1, reg); if (op0 == op1) return; + if (GET_MODE (op1) != mode) + op1 = convert_to_mode (mode, op1, 1); } } } @@ -15818,7 +15912,7 @@ ix86_split_idivmod (enum machine_mode mode, rtx operands[], insn = emit_move_insn (operands[1], tmp1); else { - /* Need a new scratch register since the old one has result + /* Need a new scratch register since the old one has result of 8bit divide. */ scratch = gen_reg_rtx (mode); emit_move_insn (scratch, tmp1); @@ -21441,7 +21535,17 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2, rtx pop, bool sibcall) { + /* We need to represent that SI and DI registers are clobbered + by SYSV calls. */ + static int clobbered_registers[] = { + XMM6_REG, XMM7_REG, XMM8_REG, + XMM9_REG, XMM10_REG, XMM11_REG, + XMM12_REG, XMM13_REG, XMM14_REG, + XMM15_REG, SI_REG, DI_REG + }; + rtx vec[ARRAY_SIZE (clobbered_registers) + 3]; rtx use = NULL, call; + unsigned int vec_len; if (pop == const0_rtx) pop = NULL; @@ -21479,56 +21583,46 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode) : !call_insn_operand (XEXP (fnaddr, 0), Pmode)) { - fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); - fnaddr = gen_rtx_MEM (QImode, fnaddr); + fnaddr = XEXP (fnaddr, 0); + if (GET_MODE (fnaddr) != Pmode) + fnaddr = convert_to_mode (Pmode, fnaddr, 1); + fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr)); } + vec_len = 0; call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); if (retval) call = gen_rtx_SET (VOIDmode, retval, call); + vec[vec_len++] = call; + if (pop) { pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); - call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); + vec[vec_len++] = pop; } + if (TARGET_64BIT_MS_ABI && (!callarg2 || INTVAL (callarg2) != -2)) { - /* We need to represent that SI and DI registers are clobbered - by SYSV calls. */ - static int clobbered_registers[] = { - XMM6_REG, XMM7_REG, XMM8_REG, - XMM9_REG, XMM10_REG, XMM11_REG, - XMM12_REG, XMM13_REG, XMM14_REG, - XMM15_REG, SI_REG, DI_REG - }; - unsigned int i; - rtx vec[ARRAY_SIZE (clobbered_registers) + 2]; - rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), - UNSPEC_MS_TO_SYSV_CALL); + unsigned i; - vec[0] = call; - vec[1] = unspec; - for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++) - vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i]) - ? TImode : DImode, - gen_rtx_REG - (SSE_REGNO_P (clobbered_registers[i]) - ? TImode : DImode, - clobbered_registers[i])); + vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), + UNSPEC_MS_TO_SYSV_CALL); - call = gen_rtx_PARALLEL (VOIDmode, - gen_rtvec_v (ARRAY_SIZE (clobbered_registers) - + 2, vec)); + for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++) + vec[vec_len++] + = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i]) + ? TImode : DImode, + gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i]) + ? TImode : DImode, + clobbered_registers[i])); } /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */ if (TARGET_VZEROUPPER) { - rtx unspec; int avx256; - if (cfun->machine->callee_pass_avx256_p) { if (cfun->machine->callee_return_avx256_p) @@ -21544,15 +21638,13 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, if (reload_completed) emit_insn (gen_avx_vzeroupper (GEN_INT (avx256))); else - { - unspec = gen_rtx_UNSPEC (VOIDmode, - gen_rtvec (1, GEN_INT (avx256)), - UNSPEC_CALL_NEEDS_VZEROUPPER); - call = gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (2, call, unspec)); - } + vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, + gen_rtvec (1, GEN_INT (avx256)), + UNSPEC_CALL_NEEDS_VZEROUPPER); } + if (vec_len > 1) + call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec)); call = emit_call_insn (call); if (use) CALL_INSN_FUNCTION_USAGE (call) = use; @@ -21563,9 +21655,20 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, void ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper) { - rtx call = XVECEXP (PATTERN (insn), 0, 0); + rtx pat = PATTERN (insn); + rtvec vec = XVEC (pat, 0); + int len = GET_NUM_ELEM (vec) - 1; + + /* Strip off the last entry of the parallel. */ + gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC); + gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER); + if (len == 1) + pat = RTVEC_ELT (vec, 0); + else + pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0))); + emit_insn (gen_avx_vzeroupper (vzeroupper)); - emit_call_insn (call); + emit_call_insn (pat); } /* Output the assembly for a call instruction. */ @@ -21683,7 +21786,8 @@ assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) } /* Calculate the length of the memory address in the instruction - encoding. Does not include the one-byte modrm, opcode, or prefix. */ + encoding. Includes addr32 prefix, does not include the one-byte modrm, + opcode, or other prefixes. */ int memory_address_length (rtx addr) @@ -21710,7 +21814,9 @@ memory_address_length (rtx addr) base = parts.base; index = parts.index; disp = parts.disp; - len = 0; + + /* Add length of addr32 prefix. */ + len = (GET_CODE (addr) == ZERO_EXTEND); /* Rule of thumb: - esp as the base always wants an index, @@ -22855,7 +22961,7 @@ ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) case AX_REG: opcode = 0xb8; break; case CX_REG: - opcode = 0xb9; break; + opcode = 0xb9; break; default: gcc_unreachable (); } @@ -24898,7 +25004,7 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, /* BMI */ { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, @@ -25458,7 +25564,7 @@ ix86_init_builtins (void) ix86_init_mmx_sse_builtins (); - if (TARGET_64BIT) + if (TARGET_LP64) ix86_init_builtins_va_builtins_abi (); #ifdef SUBTARGET_INIT_BUILTINS @@ -26704,7 +26810,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, op = expand_normal (arg); gcc_assert (target == 0); if (memory) - target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op)); + { + if (GET_MODE (op) != Pmode) + op = convert_to_mode (Pmode, op, 1); + target = gen_rtx_MEM (tmode, force_reg (Pmode, op)); + } else target = force_reg (tmode, op); arg_adjust = 1; @@ -26747,7 +26857,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, if (i == memory) { /* This must be the memory operand. */ - op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op)); + if (GET_MODE (op) != Pmode) + op = convert_to_mode (Pmode, op, 1); + op = gen_rtx_MEM (mode, force_reg (Pmode, op)); gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); } @@ -26973,8 +27085,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, mode1 = insn_data[icode].operand[1].mode; mode2 = insn_data[icode].operand[2].mode; - op0 = force_reg (Pmode, op0); - op0 = gen_rtx_MEM (mode1, op0); + if (GET_MODE (op0) != Pmode) + op0 = convert_to_mode (Pmode, op0, 1); + op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0)); if (!insn_data[icode].operand[0].predicate (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); @@ -27005,7 +27118,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, op0 = expand_normal (arg0); icode = CODE_FOR_sse2_clflush; if (!insn_data[icode].operand[0].predicate (op0, Pmode)) - op0 = copy_to_mode_reg (Pmode, op0); + { + if (GET_MODE (op0) != Pmode) + op0 = convert_to_mode (Pmode, op0, 1); + op0 = force_reg (Pmode, op0); + } emit_insn (gen_sse2_clflush (op0)); return 0; @@ -27018,7 +27135,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, op1 = expand_normal (arg1); op2 = expand_normal (arg2); if (!REG_P (op0)) - op0 = copy_to_mode_reg (Pmode, op0); + { + if (GET_MODE (op0) != Pmode) + op0 = convert_to_mode (Pmode, op0, 1); + op0 = force_reg (Pmode, op0); + } if (!REG_P (op1)) op1 = copy_to_mode_reg (SImode, op1); if (!REG_P (op2)) @@ -27098,7 +27219,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, op0 = expand_normal (arg0); icode = CODE_FOR_lwp_llwpcb; if (!insn_data[icode].operand[0].predicate (op0, Pmode)) - op0 = copy_to_mode_reg (Pmode, op0); + { + if (GET_MODE (op0) != Pmode) + op0 = convert_to_mode (Pmode, op0, 1); + op0 = force_reg (Pmode, op0); + } emit_insn (gen_lwp_llwpcb (op0)); return 0; @@ -27157,7 +27282,10 @@ rdrand_step: arg0 = CALL_EXPR_ARG (exp, 0); op1 = expand_normal (arg0); if (!address_operand (op1, VOIDmode)) - op1 = copy_addr_to_reg (op1); + { + op1 = convert_memory_address (Pmode, op1); + op1 = copy_addr_to_reg (op1); + } emit_move_insn (gen_rtx_MEM (mode0, op1), op0); op1 = gen_reg_rtx (SImode); @@ -28119,9 +28247,25 @@ ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) static reg_class_t ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, - enum machine_mode mode, - secondary_reload_info *sri ATTRIBUTE_UNUSED) + enum machine_mode mode, secondary_reload_info *sri) { + /* Double-word spills from general registers to non-offsettable memory + references (zero-extended addresses) require special handling. */ + if (TARGET_64BIT + && MEM_P (x) + && GET_MODE_SIZE (mode) > UNITS_PER_WORD + && rclass == GENERAL_REGS + && !offsettable_memref_p (x)) + { + sri->icode = (in_p + ? CODE_FOR_reload_noff_load + : CODE_FOR_reload_noff_store); + /* Add the cost of move to a temporary. */ + sri->extra_cost = 1; + + return NO_REGS; + } + /* QImode spills from non-QI registers require intermediate register on 32bit targets. */ if (!TARGET_64BIT @@ -28147,7 +28291,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, /* This condition handles corner case where an expression involving pointers gets vectorized. We're trying to use the address of a - stack slot as a vector initializer. + stack slot as a vector initializer. (set (reg:V2DI 74 [ vect_cst_.2 ]) (vec_duplicate:V2DI (reg/f:DI 20 frame))) @@ -29256,13 +29400,6 @@ ix86_handle_abi_attribute (tree *node, tree name, *no_add_attrs = true; return NULL_TREE; } - if (!TARGET_64BIT) - { - warning (OPT_Wattributes, "%qE attribute only available for 64-bit", - name); - *no_add_attrs = true; - return NULL_TREE; - } /* Can combine regparm with all attributes but fastcall. */ if (is_attribute_p ("ms_abi", name)) @@ -29871,7 +30008,7 @@ ix86_pad_returns (void) /* Count the minimum number of instructions in BB. Return 4 if the number of instructions >= 4. */ -static int +static int ix86_count_insn_bb (basic_block bb) { rtx insn; @@ -29900,10 +30037,10 @@ ix86_count_insn_bb (basic_block bb) } -/* Count the minimum number of instructions in code path in BB. +/* Count the minimum number of instructions in code path in BB. Return 4 if the number of instructions >= 4. */ -static int +static int ix86_count_insn (basic_block bb) { edge e; @@ -34813,7 +34950,7 @@ ix86_autovectorize_vector_sizes (void) #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA -#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra #undef TARGET_SCHED_INIT_GLOBAL #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global @@ -34968,6 +35105,9 @@ ix86_autovectorize_vector_sizes (void) #undef TARGET_FUNCTION_VALUE_REGNO_P #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode + #undef TARGET_SECONDARY_RELOAD #define TARGET_SECONDARY_RELOAD ix86_secondary_reload