1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 #include "coretypes.h"
24 #include "hard-reg-set.h"
26 #include "insn-config.h"
27 #include "conditions.h"
28 #include "insn-attr.h"
38 #include "basic-block.h"
39 #include "integrate.h"
45 #include "target-def.h"
46 #include "langhooks.h"
48 #include "cfglayout.h"
49 #include "sched-int.h"
54 #include "tree-gimple.h"
55 #include "tm-constrs.h"
56 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
64 static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88 static rtx get_pic_reg (void);
89 static int need_to_save_reg (int regno, int saving);
90 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
94 static void emit_nop_for_insn (rtx insn);
95 static bool insn_clobbers_hbr (rtx insn);
96 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
98 static rtx get_branch_target (rtx branch);
99 static void insert_branch_hints (void);
100 static void insert_nops (void);
101 static void spu_machine_dependent_reorg (void);
102 static int spu_sched_issue_rate (void);
103 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
105 static int get_pipe (rtx insn);
106 static int spu_sched_adjust_priority (rtx insn, int pri);
107 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
108 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
110 unsigned char *no_add_attrs);
111 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
113 unsigned char *no_add_attrs);
114 static int spu_naked_function_p (tree func);
115 static unsigned char spu_pass_by_reference (int *cum, enum machine_mode mode,
116 tree type, unsigned char named);
117 static tree spu_build_builtin_va_list (void);
118 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
120 static int regno_aligned_for_load (int regno);
121 static int store_with_one_insn_p (rtx mem);
122 static int reg_align (rtx reg);
123 static int mem_is_padded_component_ref (rtx x);
124 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
125 static void spu_asm_globalize_label (FILE * file, const char *name);
126 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
128 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
129 static void spu_init_libfuncs (void);
130 static bool spu_return_in_memory (tree type, tree fntype);
131 static void fix_range (const char *);
132 static void spu_encode_section_info (tree, rtx, int);
133 static tree spu_builtin_mul_widen_even (tree);
134 static tree spu_builtin_mul_widen_odd (tree);
135 static tree spu_builtin_mask_for_load (void);
136 static int spu_builtin_vectorization_cost (bool);
138 extern const char *reg_names[];
139 rtx spu_compare_op0, spu_compare_op1;
154 IC_POOL, /* constant pool */
155 IC_IL1, /* one il* instruction */
156 IC_IL2, /* both ilhu and iohl instructions */
157 IC_IL1s, /* one il* instruction */
158 IC_IL2s, /* both ilhu and iohl instructions */
159 IC_FSMBI, /* the fsmbi instruction */
160 IC_CPAT, /* one of the c*d instructions */
161 IC_FSMBI2 /* fsmbi plus 1 other instruction */
164 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
165 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
166 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
167 static enum immediate_class classify_immediate (rtx op,
168 enum machine_mode mode);
170 /* Built in types. */
171 tree spu_builtin_types[SPU_BTI_MAX];
173 /* TARGET overrides. */
175 #undef TARGET_INIT_BUILTINS
176 #define TARGET_INIT_BUILTINS spu_init_builtins
178 #undef TARGET_EXPAND_BUILTIN
179 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
181 #undef TARGET_EH_RETURN_FILTER_MODE
182 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
184 /* The .8byte directive doesn't seem to work well for a 32 bit
186 #undef TARGET_ASM_UNALIGNED_DI_OP
187 #define TARGET_ASM_UNALIGNED_DI_OP NULL
189 #undef TARGET_RTX_COSTS
190 #define TARGET_RTX_COSTS spu_rtx_costs
192 #undef TARGET_ADDRESS_COST
193 #define TARGET_ADDRESS_COST hook_int_rtx_0
195 #undef TARGET_SCHED_ISSUE_RATE
196 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
198 #undef TARGET_SCHED_VARIABLE_ISSUE
199 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
201 #undef TARGET_SCHED_ADJUST_PRIORITY
202 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
204 #undef TARGET_SCHED_ADJUST_COST
205 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
207 const struct attribute_spec spu_attribute_table[];
208 #undef TARGET_ATTRIBUTE_TABLE
209 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
211 #undef TARGET_ASM_INTEGER
212 #define TARGET_ASM_INTEGER spu_assemble_integer
214 #undef TARGET_SCALAR_MODE_SUPPORTED_P
215 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
217 #undef TARGET_VECTOR_MODE_SUPPORTED_P
218 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
220 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
221 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
223 #undef TARGET_ASM_GLOBALIZE_LABEL
224 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
226 #undef TARGET_PASS_BY_REFERENCE
227 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
229 #undef TARGET_MUST_PASS_IN_STACK
230 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
232 #undef TARGET_BUILD_BUILTIN_VA_LIST
233 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
235 #undef TARGET_SETUP_INCOMING_VARARGS
236 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
238 #undef TARGET_MACHINE_DEPENDENT_REORG
239 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
241 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
242 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
244 #undef TARGET_DEFAULT_TARGET_FLAGS
245 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
247 #undef TARGET_INIT_LIBFUNCS
248 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
250 #undef TARGET_RETURN_IN_MEMORY
251 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
253 #undef TARGET_ENCODE_SECTION_INFO
254 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
256 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
257 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
259 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
260 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
262 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
263 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
265 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
266 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
268 struct gcc_target targetm = TARGET_INITIALIZER;
271 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
273 /* Override some of the default param values. With so many registers
274 larger values are better for these params. */
275 MAX_PENDING_LIST_LENGTH = 128;
277 /* With so many registers this is better on by default. */
278 flag_rename_registers = 1;
281 /* Sometimes certain combinations of command options do not make sense
282 on a particular target machine. You can define a macro
283 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
284 executed once just after all the command options have been parsed. */
286 spu_override_options (void)
288 /* Small loops will be unpeeled at -O3. For SPU it is more important
289 to keep code small by default. */
290 if (!flag_unroll_loops && !flag_peel_loops
291 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
292 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
294 flag_omit_frame_pointer = 1;
296 if (align_functions < 8)
299 if (spu_fixed_range_string)
300 fix_range (spu_fixed_range_string);
303 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
304 struct attribute_spec.handler. */
306 /* Table of machine attributes. */
307 const struct attribute_spec spu_attribute_table[] =
309 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
310 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
311 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
312 { NULL, 0, 0, false, false, false, NULL }
315 /* True if MODE is valid for the target. By "valid", we mean able to
316 be manipulated in non-trivial ways. In particular, this means all
317 the arithmetic is supported. */
319 spu_scalar_mode_supported_p (enum machine_mode mode)
337 /* Similarly for vector modes. "Supported" here is less strict. At
338 least some operations are supported; need to check optabs or builtins
339 for further details. */
341 spu_vector_mode_supported_p (enum machine_mode mode)
358 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
359 least significant bytes of the outer mode. This function returns
360 TRUE for the SUBREG's where this is correct. */
362 valid_subreg (rtx op)
364 enum machine_mode om = GET_MODE (op);
365 enum machine_mode im = GET_MODE (SUBREG_REG (op));
366 return om != VOIDmode && im != VOIDmode
367 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
368 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
371 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
372 and adjust the start offset. */
374 adjust_operand (rtx op, HOST_WIDE_INT * start)
376 enum machine_mode mode;
378 /* Strip any SUBREG */
379 if (GET_CODE (op) == SUBREG)
383 GET_MODE_BITSIZE (GET_MODE (op)) -
384 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
385 op = SUBREG_REG (op);
387 /* If it is smaller than SI, assure a SUBREG */
388 op_size = GET_MODE_BITSIZE (GET_MODE (op));
392 *start += 32 - op_size;
395 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
396 mode = mode_for_size (op_size, MODE_INT, 0);
397 if (mode != GET_MODE (op))
398 op = gen_rtx_SUBREG (mode, op, 0);
403 spu_expand_extv (rtx ops[], int unsignedp)
405 HOST_WIDE_INT width = INTVAL (ops[2]);
406 HOST_WIDE_INT start = INTVAL (ops[3]);
407 HOST_WIDE_INT src_size, dst_size;
408 enum machine_mode src_mode, dst_mode;
409 rtx dst = ops[0], src = ops[1];
412 dst = adjust_operand (ops[0], 0);
413 dst_mode = GET_MODE (dst);
414 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
416 src = adjust_operand (src, &start);
417 src_mode = GET_MODE (src);
418 src_size = GET_MODE_BITSIZE (GET_MODE (src));
422 s = gen_reg_rtx (src_mode);
426 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
429 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
432 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
440 if (width < src_size)
447 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
450 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
453 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
458 s = gen_reg_rtx (src_mode);
459 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
464 convert_move (dst, src, unsignedp);
468 spu_expand_insv (rtx ops[])
470 HOST_WIDE_INT width = INTVAL (ops[1]);
471 HOST_WIDE_INT start = INTVAL (ops[2]);
472 HOST_WIDE_INT maskbits;
473 enum machine_mode dst_mode, src_mode;
474 rtx dst = ops[0], src = ops[3];
475 int dst_size, src_size;
481 if (GET_CODE (ops[0]) == MEM)
482 dst = gen_reg_rtx (TImode);
484 dst = adjust_operand (dst, &start);
485 dst_mode = GET_MODE (dst);
486 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
488 if (CONSTANT_P (src))
490 enum machine_mode m =
491 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
492 src = force_reg (m, convert_to_mode (m, src, 0));
494 src = adjust_operand (src, 0);
495 src_mode = GET_MODE (src);
496 src_size = GET_MODE_BITSIZE (GET_MODE (src));
498 mask = gen_reg_rtx (dst_mode);
499 shift_reg = gen_reg_rtx (dst_mode);
500 shift = dst_size - start - width;
502 /* It's not safe to use subreg here because the compiler assumes
503 that the SUBREG_REG is right justified in the SUBREG. */
504 convert_move (shift_reg, src, 1);
511 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
514 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
517 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
529 maskbits = (-1ll << (32 - width - start));
531 maskbits += (1ll << (32 - start));
532 emit_move_insn (mask, GEN_INT (maskbits));
535 maskbits = (-1ll << (64 - width - start));
537 maskbits += (1ll << (64 - start));
538 emit_move_insn (mask, GEN_INT (maskbits));
542 unsigned char arr[16];
544 memset (arr, 0, sizeof (arr));
545 arr[i] = 0xff >> (start & 7);
546 for (i++; i <= (start + width - 1) / 8; i++)
548 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
549 emit_move_insn (mask, array_to_constant (TImode, arr));
555 if (GET_CODE (ops[0]) == MEM)
557 rtx aligned = gen_reg_rtx (SImode);
558 rtx low = gen_reg_rtx (SImode);
559 rtx addr = gen_reg_rtx (SImode);
560 rtx rotl = gen_reg_rtx (SImode);
561 rtx mask0 = gen_reg_rtx (TImode);
564 emit_move_insn (addr, XEXP (ops[0], 0));
565 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
566 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
567 emit_insn (gen_negsi2 (rotl, low));
568 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
569 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
570 mem = change_address (ops[0], TImode, aligned);
571 set_mem_alias_set (mem, 0);
572 emit_move_insn (dst, mem);
573 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
574 emit_move_insn (mem, dst);
575 if (start + width > MEM_ALIGN (ops[0]))
577 rtx shl = gen_reg_rtx (SImode);
578 rtx mask1 = gen_reg_rtx (TImode);
579 rtx dst1 = gen_reg_rtx (TImode);
581 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
582 emit_insn (gen_shlqby_ti (mask1, mask, shl));
583 mem1 = adjust_address (mem, TImode, 16);
584 set_mem_alias_set (mem1, 0);
585 emit_move_insn (dst1, mem1);
586 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
587 emit_move_insn (mem1, dst1);
591 emit_insn (gen_selb (dst, dst, shift_reg, mask));
596 spu_expand_block_move (rtx ops[])
598 HOST_WIDE_INT bytes, align, offset;
599 rtx src, dst, sreg, dreg, target;
601 if (GET_CODE (ops[2]) != CONST_INT
602 || GET_CODE (ops[3]) != CONST_INT
603 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
606 bytes = INTVAL (ops[2]);
607 align = INTVAL (ops[3]);
617 for (offset = 0; offset + 16 <= bytes; offset += 16)
619 dst = adjust_address (ops[0], V16QImode, offset);
620 src = adjust_address (ops[1], V16QImode, offset);
621 emit_move_insn (dst, src);
626 unsigned char arr[16] = { 0 };
627 for (i = 0; i < bytes - offset; i++)
629 dst = adjust_address (ops[0], V16QImode, offset);
630 src = adjust_address (ops[1], V16QImode, offset);
631 mask = gen_reg_rtx (V16QImode);
632 sreg = gen_reg_rtx (V16QImode);
633 dreg = gen_reg_rtx (V16QImode);
634 target = gen_reg_rtx (V16QImode);
635 emit_move_insn (mask, array_to_constant (V16QImode, arr));
636 emit_move_insn (dreg, dst);
637 emit_move_insn (sreg, src);
638 emit_insn (gen_selb (target, dreg, sreg, mask));
639 emit_move_insn (dst, target);
647 { SPU_EQ, SPU_GT, SPU_GTU };
650 int spu_comp_icode[8][3] = {
651 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
652 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
653 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
654 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
655 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
656 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
658 {CODE_FOR_ceq_vec, 0, 0},
661 /* Generate a compare for CODE. Return a brand-new rtx that represents
662 the result of the compare. GCC can figure this out too if we don't
663 provide all variations of compares, but GCC always wants to use
664 WORD_MODE, we can generate better code in most cases if we do it
667 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
669 int reverse_compare = 0;
670 int reverse_test = 0;
673 rtx target = operands[0];
674 enum machine_mode comp_mode;
675 enum machine_mode op_mode;
676 enum spu_comp_code scode;
679 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
680 and so on, to keep the constant in operand 1. */
681 if (GET_CODE (spu_compare_op1) == CONST_INT)
683 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
684 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
688 spu_compare_op1 = GEN_INT (val);
692 spu_compare_op1 = GEN_INT (val);
696 spu_compare_op1 = GEN_INT (val);
700 spu_compare_op1 = GEN_INT (val);
761 op_mode = GET_MODE (spu_compare_op0);
800 if (GET_MODE (spu_compare_op1) == DFmode)
802 rtx reg = gen_reg_rtx (DFmode);
803 if (!flag_unsafe_math_optimizations
804 || (scode != SPU_GT && scode != SPU_EQ))
807 emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
809 emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
811 spu_compare_op0 = reg;
812 spu_compare_op1 = CONST0_RTX (DFmode);
815 if (is_set == 0 && spu_compare_op1 == const0_rtx
816 && (GET_MODE (spu_compare_op0) == SImode
817 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
819 /* Don't need to set a register with the result when we are
820 comparing against zero and branching. */
821 reverse_test = !reverse_test;
822 compare_result = spu_compare_op0;
826 compare_result = gen_reg_rtx (comp_mode);
830 rtx t = spu_compare_op1;
831 spu_compare_op1 = spu_compare_op0;
835 if (spu_comp_icode[index][scode] == 0)
838 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
839 (spu_compare_op0, op_mode))
840 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
841 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
842 (spu_compare_op1, op_mode))
843 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
844 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
849 emit_insn (comp_rtx);
858 /* We don't have branch on QI compare insns, so we convert the
859 QI compare result to a HI result. */
860 if (comp_mode == QImode)
862 rtx old_res = compare_result;
863 compare_result = gen_reg_rtx (HImode);
865 emit_insn (gen_extendqihi2 (compare_result, old_res));
869 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
871 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
873 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
874 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
875 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
878 else if (is_set == 2)
880 int compare_size = GET_MODE_BITSIZE (comp_mode);
881 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
882 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
884 rtx op_t = operands[2];
885 rtx op_f = operands[3];
887 /* The result of the comparison can be SI, HI or QI mode. Create a
888 mask based on that result. */
889 if (target_size > compare_size)
891 select_mask = gen_reg_rtx (mode);
892 emit_insn (gen_extend_compare (select_mask, compare_result));
894 else if (target_size < compare_size)
896 gen_rtx_SUBREG (mode, compare_result,
897 (compare_size - target_size) / BITS_PER_UNIT);
898 else if (comp_mode != mode)
899 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
901 select_mask = compare_result;
903 if (GET_MODE (target) != GET_MODE (op_t)
904 || GET_MODE (target) != GET_MODE (op_f))
908 emit_insn (gen_selb (target, op_t, op_f, select_mask));
910 emit_insn (gen_selb (target, op_f, op_t, select_mask));
915 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
916 gen_rtx_NOT (comp_mode, compare_result)));
917 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
918 emit_insn (gen_extendhisi2 (target, compare_result));
919 else if (GET_MODE (target) == SImode
920 && GET_MODE (compare_result) == QImode)
921 emit_insn (gen_extend_compare (target, compare_result));
923 emit_move_insn (target, compare_result);
928 const_double_to_hwint (rtx x)
932 if (GET_MODE (x) == SFmode)
934 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
935 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
937 else if (GET_MODE (x) == DFmode)
940 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
941 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
943 val = (val << 32) | (l[1] & 0xffffffff);
951 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
955 gcc_assert (mode == SFmode || mode == DFmode);
958 tv[0] = (v << 32) >> 32;
959 else if (mode == DFmode)
961 tv[1] = (v << 32) >> 32;
964 real_from_target (&rv, tv, mode);
965 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
969 print_operand_address (FILE * file, register rtx addr)
974 if (GET_CODE (addr) == AND
975 && GET_CODE (XEXP (addr, 1)) == CONST_INT
976 && INTVAL (XEXP (addr, 1)) == -16)
977 addr = XEXP (addr, 0);
979 switch (GET_CODE (addr))
982 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
986 reg = XEXP (addr, 0);
987 offset = XEXP (addr, 1);
988 if (GET_CODE (offset) == REG)
990 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
991 reg_names[REGNO (offset)]);
993 else if (GET_CODE (offset) == CONST_INT)
995 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
996 INTVAL (offset), reg_names[REGNO (reg)]);
1006 output_addr_const (file, addr);
1016 print_operand (FILE * file, rtx x, int code)
1018 enum machine_mode mode = GET_MODE (x);
1020 unsigned char arr[16];
1021 int xcode = GET_CODE (x);
1023 if (GET_MODE (x) == VOIDmode)
1026 case 'L': /* 128 bits, signed */
1027 case 'm': /* 128 bits, signed */
1028 case 'T': /* 128 bits, signed */
1029 case 't': /* 128 bits, signed */
1032 case 'K': /* 64 bits, signed */
1033 case 'k': /* 64 bits, signed */
1034 case 'D': /* 64 bits, signed */
1035 case 'd': /* 64 bits, signed */
1038 case 'J': /* 32 bits, signed */
1039 case 'j': /* 32 bits, signed */
1040 case 's': /* 32 bits, signed */
1041 case 'S': /* 32 bits, signed */
1048 case 'j': /* 32 bits, signed */
1049 case 'k': /* 64 bits, signed */
1050 case 'm': /* 128 bits, signed */
1051 if (xcode == CONST_INT
1052 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1054 gcc_assert (logical_immediate_p (x, mode));
1055 constant_to_array (mode, x, arr);
1056 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1057 val = trunc_int_for_mode (val, SImode);
1058 switch (which_logical_immediate (val))
1063 fprintf (file, "h");
1066 fprintf (file, "b");
1076 case 'J': /* 32 bits, signed */
1077 case 'K': /* 64 bits, signed */
1078 case 'L': /* 128 bits, signed */
1079 if (xcode == CONST_INT
1080 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1082 gcc_assert (logical_immediate_p (x, mode)
1083 || iohl_immediate_p (x, mode));
1084 constant_to_array (mode, x, arr);
1085 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1086 val = trunc_int_for_mode (val, SImode);
1087 switch (which_logical_immediate (val))
1093 val = trunc_int_for_mode (val, HImode);
1096 val = trunc_int_for_mode (val, QImode);
1101 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1107 case 't': /* 128 bits, signed */
1108 case 'd': /* 64 bits, signed */
1109 case 's': /* 32 bits, signed */
1112 enum immediate_class c = classify_immediate (x, mode);
1116 constant_to_array (mode, x, arr);
1117 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1118 val = trunc_int_for_mode (val, SImode);
1119 switch (which_immediate_load (val))
1124 fprintf (file, "a");
1127 fprintf (file, "h");
1130 fprintf (file, "hu");
1137 constant_to_array (mode, x, arr);
1138 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1140 fprintf (file, "b");
1142 fprintf (file, "h");
1144 fprintf (file, "w");
1146 fprintf (file, "d");
1149 if (xcode == CONST_VECTOR)
1151 x = CONST_VECTOR_ELT (x, 0);
1152 xcode = GET_CODE (x);
1154 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1155 fprintf (file, "a");
1156 else if (xcode == HIGH)
1157 fprintf (file, "hu");
1171 case 'T': /* 128 bits, signed */
1172 case 'D': /* 64 bits, signed */
1173 case 'S': /* 32 bits, signed */
1176 enum immediate_class c = classify_immediate (x, mode);
1180 constant_to_array (mode, x, arr);
1181 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1182 val = trunc_int_for_mode (val, SImode);
1183 switch (which_immediate_load (val))
1190 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1195 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1198 constant_to_array (mode, x, arr);
1200 for (i = 0; i < 16; i++)
1205 print_operand (file, GEN_INT (val), 0);
1208 constant_to_array (mode, x, arr);
1209 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1210 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1215 if (GET_CODE (x) == CONST_VECTOR)
1216 x = CONST_VECTOR_ELT (x, 0);
1217 output_addr_const (file, x);
1219 fprintf (file, "@h");
1233 if (xcode == CONST_INT)
1235 /* Only 4 least significant bits are relevant for generate
1236 control word instructions. */
1237 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1242 case 'M': /* print code for c*d */
1243 if (GET_CODE (x) == CONST_INT)
1247 fprintf (file, "b");
1250 fprintf (file, "h");
1253 fprintf (file, "w");
1256 fprintf (file, "d");
1265 case 'N': /* Negate the operand */
1266 if (xcode == CONST_INT)
1267 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1268 else if (xcode == CONST_VECTOR)
1269 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1270 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1273 case 'I': /* enable/disable interrupts */
1274 if (xcode == CONST_INT)
1275 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1278 case 'b': /* branch modifiers */
1280 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1281 else if (COMPARISON_P (x))
1282 fprintf (file, "%s", xcode == NE ? "n" : "");
1285 case 'i': /* indirect call */
1288 if (GET_CODE (XEXP (x, 0)) == REG)
1289 /* Used in indirect function calls. */
1290 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1292 output_address (XEXP (x, 0));
1296 case 'p': /* load/store */
1300 xcode = GET_CODE (x);
1305 xcode = GET_CODE (x);
1308 fprintf (file, "d");
1309 else if (xcode == CONST_INT)
1310 fprintf (file, "a");
1311 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1312 fprintf (file, "r");
1313 else if (xcode == PLUS || xcode == LO_SUM)
1315 if (GET_CODE (XEXP (x, 1)) == REG)
1316 fprintf (file, "x");
1318 fprintf (file, "d");
1323 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1325 output_addr_const (file, GEN_INT (val));
1329 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1331 output_addr_const (file, GEN_INT (val));
1335 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1337 output_addr_const (file, GEN_INT (val));
1341 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1342 val = (val >> 3) & 0x1f;
1343 output_addr_const (file, GEN_INT (val));
1347 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1350 output_addr_const (file, GEN_INT (val));
1354 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1357 output_addr_const (file, GEN_INT (val));
1361 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1364 output_addr_const (file, GEN_INT (val));
1368 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1369 val = -(val & -8ll);
1370 val = (val >> 3) & 0x1f;
1371 output_addr_const (file, GEN_INT (val));
1376 fprintf (file, "%s", reg_names[REGNO (x)]);
1377 else if (xcode == MEM)
1378 output_address (XEXP (x, 0));
1379 else if (xcode == CONST_VECTOR)
1380 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1382 output_addr_const (file, x);
1389 output_operand_lossage ("invalid %%xn code");
1394 extern char call_used_regs[];
1396 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1397 caller saved register. For leaf functions it is more efficient to
1398 use a volatile register because we won't need to save and restore the
1399 pic register. This routine is only valid after register allocation
1400 is completed, so we can pick an unused register. */
1404 rtx pic_reg = pic_offset_table_rtx;
1405 if (!reload_completed && !reload_in_progress)
1410 /* Split constant addresses to handle cases that are too large.
1411 Add in the pic register when in PIC mode.
1412 Split immediates that require more than 1 instruction. */
1414 spu_split_immediate (rtx * ops)
1416 enum machine_mode mode = GET_MODE (ops[0]);
1417 enum immediate_class c = classify_immediate (ops[1], mode);
1423 unsigned char arrhi[16];
1424 unsigned char arrlo[16];
1427 constant_to_array (mode, ops[1], arrhi);
1428 to = !can_create_pseudo_p () ? ops[0] : gen_reg_rtx (mode);
1429 for (i = 0; i < 16; i += 4)
1431 arrlo[i + 2] = arrhi[i + 2];
1432 arrlo[i + 3] = arrhi[i + 3];
1433 arrlo[i + 0] = arrlo[i + 1] = 0;
1434 arrhi[i + 2] = arrhi[i + 3] = 0;
1436 hi = array_to_constant (mode, arrhi);
1437 lo = array_to_constant (mode, arrlo);
1438 emit_move_insn (to, hi);
1439 emit_insn (gen_rtx_SET
1440 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1445 unsigned char arr_fsmbi[16];
1446 unsigned char arr_andbi[16];
1447 rtx to, reg_fsmbi, reg_and;
1449 enum machine_mode imode = mode;
1450 /* We need to do reals as ints because the constant used in the
1451 * AND might not be a legitimate real constant. */
1452 imode = int_mode_for_mode (mode);
1453 constant_to_array (mode, ops[1], arr_fsmbi);
1455 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1458 for (i = 0; i < 16; i++)
1459 if (arr_fsmbi[i] != 0)
1461 arr_andbi[0] = arr_fsmbi[i];
1462 arr_fsmbi[i] = 0xff;
1464 for (i = 1; i < 16; i++)
1465 arr_andbi[i] = arr_andbi[0];
1466 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1467 reg_and = array_to_constant (imode, arr_andbi);
1468 emit_move_insn (to, reg_fsmbi);
1469 emit_insn (gen_rtx_SET
1470 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1474 if (reload_in_progress || reload_completed)
1476 rtx mem = force_const_mem (mode, ops[1]);
1477 if (TARGET_LARGE_MEM)
1479 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1480 emit_move_insn (addr, XEXP (mem, 0));
1481 mem = replace_equiv_address (mem, addr);
1483 emit_move_insn (ops[0], mem);
1489 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1493 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1494 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1497 emit_insn (gen_pic (ops[0], ops[1]));
1500 rtx pic_reg = get_pic_reg ();
1501 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1502 current_function_uses_pic_offset_table = 1;
1504 return flag_pic || c == IC_IL2s;
1515 /* SAVING is TRUE when we are generating the actual load and store
1516 instructions for REGNO. When determining the size of the stack
1517 needed for saving register we must allocate enough space for the
1518 worst case, because we don't always have the information early enough
1519 to not allocate it. But we can at least eliminate the actual loads
1520 and stores during the prologue/epilogue. */
1522 need_to_save_reg (int regno, int saving)
1524 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1527 && regno == PIC_OFFSET_TABLE_REGNUM
1528 && (!saving || current_function_uses_pic_offset_table)
1530 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1535 /* This function is only correct starting with local register
1538 spu_saved_regs_size (void)
1540 int reg_save_size = 0;
1543 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1544 if (need_to_save_reg (regno, 0))
1545 reg_save_size += 0x10;
1546 return reg_save_size;
1550 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1552 rtx reg = gen_rtx_REG (V4SImode, regno);
1554 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1555 return emit_insn (gen_movv4si (mem, reg));
1559 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1561 rtx reg = gen_rtx_REG (V4SImode, regno);
1563 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1564 return emit_insn (gen_movv4si (reg, mem));
1567 /* This happens after reload, so we need to expand it. */
1569 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1572 if (satisfies_constraint_K (GEN_INT (imm)))
1574 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1578 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1579 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1580 if (REGNO (src) == REGNO (scratch))
1586 /* Return nonzero if this function is known to have a null epilogue. */
1589 direct_return (void)
1591 if (reload_completed)
1593 if (cfun->static_chain_decl == 0
1594 && (spu_saved_regs_size ()
1596 + current_function_outgoing_args_size
1597 + current_function_pretend_args_size == 0)
1598 && current_function_is_leaf)
1605 The stack frame looks like this:
1612 prev SP | back chain |
1615 | reg save | current_function_pretend_args_size bytes
1618 | saved regs | spu_saved_regs_size() bytes
1621 FP | vars | get_frame_size() bytes
1625 | args | current_function_outgoing_args_size bytes
1635 spu_expand_prologue (void)
1637 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1638 HOST_WIDE_INT total_size;
1639 HOST_WIDE_INT saved_regs_size;
1640 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1641 rtx scratch_reg_0, scratch_reg_1;
1644 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1645 the "toplevel" insn chain. */
1646 emit_note (NOTE_INSN_DELETED);
1648 if (flag_pic && optimize == 0)
1649 current_function_uses_pic_offset_table = 1;
1651 if (spu_naked_function_p (current_function_decl))
1654 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1655 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1657 saved_regs_size = spu_saved_regs_size ();
1658 total_size = size + saved_regs_size
1659 + current_function_outgoing_args_size
1660 + current_function_pretend_args_size;
1662 if (!current_function_is_leaf
1663 || current_function_calls_alloca || total_size > 0)
1664 total_size += STACK_POINTER_OFFSET;
1666 /* Save this first because code after this might use the link
1667 register as a scratch register. */
1668 if (!current_function_is_leaf)
1670 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1671 RTX_FRAME_RELATED_P (insn) = 1;
1676 offset = -current_function_pretend_args_size;
1677 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1678 if (need_to_save_reg (regno, 1))
1681 insn = frame_emit_store (regno, sp_reg, offset);
1682 RTX_FRAME_RELATED_P (insn) = 1;
1686 if (flag_pic && current_function_uses_pic_offset_table)
1688 rtx pic_reg = get_pic_reg ();
1689 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1690 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1695 if (flag_stack_check)
1697 /* We compare against total_size-1 because
1698 ($sp >= total_size) <=> ($sp > total_size-1) */
1699 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1700 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1701 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1702 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1704 emit_move_insn (scratch_v4si, size_v4si);
1705 size_v4si = scratch_v4si;
1707 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1708 emit_insn (gen_vec_extractv4si
1709 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1710 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1713 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1714 the value of the previous $sp because we save it as the back
1716 if (total_size <= 2000)
1718 /* In this case we save the back chain first. */
1719 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1721 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1723 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1725 insn = emit_move_insn (scratch_reg_0, sp_reg);
1727 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1731 insn = emit_move_insn (scratch_reg_0, sp_reg);
1733 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1735 RTX_FRAME_RELATED_P (insn) = 1;
1736 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1738 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1740 if (total_size > 2000)
1742 /* Save the back chain ptr */
1743 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1746 if (frame_pointer_needed)
1748 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1749 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1750 + current_function_outgoing_args_size;
1751 /* Set the new frame_pointer */
1752 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1753 RTX_FRAME_RELATED_P (insn) = 1;
1754 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1756 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1757 real, REG_NOTES (insn));
1758 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1762 emit_note (NOTE_INSN_DELETED);
1766 spu_expand_epilogue (bool sibcall_p)
1768 int size = get_frame_size (), offset, regno;
1769 HOST_WIDE_INT saved_regs_size, total_size;
1770 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1771 rtx jump, scratch_reg_0;
1773 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1774 the "toplevel" insn chain. */
1775 emit_note (NOTE_INSN_DELETED);
1777 if (spu_naked_function_p (current_function_decl))
1780 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1782 saved_regs_size = spu_saved_regs_size ();
1783 total_size = size + saved_regs_size
1784 + current_function_outgoing_args_size
1785 + current_function_pretend_args_size;
1787 if (!current_function_is_leaf
1788 || current_function_calls_alloca || total_size > 0)
1789 total_size += STACK_POINTER_OFFSET;
1793 if (current_function_calls_alloca)
1794 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1796 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1799 if (saved_regs_size > 0)
1801 offset = -current_function_pretend_args_size;
1802 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1803 if (need_to_save_reg (regno, 1))
1806 frame_emit_load (regno, sp_reg, offset);
1811 if (!current_function_is_leaf)
1812 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1816 emit_insn (gen_rtx_USE
1817 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1818 jump = emit_jump_insn (gen__return ());
1819 emit_barrier_after (jump);
1822 emit_note (NOTE_INSN_DELETED);
1826 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1830 /* This is inefficient because it ends up copying to a save-register
1831 which then gets saved even though $lr has already been saved. But
1832 it does generate better code for leaf functions and we don't need
1833 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1834 used for __builtin_return_address anyway, so maybe we don't care if
1835 it's inefficient. */
1836 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1840 /* Given VAL, generate a constant appropriate for MODE.
1841 If MODE is a vector mode, every element will be VAL.
1842 For TImode, VAL will be zero extended to 128 bits. */
1844 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1850 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1851 || GET_MODE_CLASS (mode) == MODE_FLOAT
1852 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1853 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1855 if (GET_MODE_CLASS (mode) == MODE_INT)
1856 return immed_double_const (val, 0, mode);
1858 /* val is the bit representation of the float */
1859 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1860 return hwint_to_const_double (mode, val);
1862 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1863 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1865 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1867 units = GET_MODE_NUNITS (mode);
1869 v = rtvec_alloc (units);
1871 for (i = 0; i < units; ++i)
1872 RTVEC_ELT (v, i) = inner;
1874 return gen_rtx_CONST_VECTOR (mode, v);
1877 /* branch hint stuff */
1879 /* The hardware requires 8 insns between a hint and the branch it
1880 effects. This variable describes how many rtl instructions the
1881 compiler needs to see before inserting a hint. (FIXME: We should
1882 accept less and insert nops to enforce it because hinting is always
1883 profitable for performance, but we do need to be careful of code
1885 int spu_hint_dist = (8 * 4);
1887 /* An array of these is used to propagate hints to predecessor blocks. */
1890 rtx prop_jump; /* propagated from another block */
1891 basic_block bb; /* the original block. */
1894 /* The special $hbr register is used to prevent the insn scheduler from
1895 moving hbr insns across instructions which invalidate them. It
1896 should only be used in a clobber, and this function searches for
1897 insns which clobber it. */
1899 insn_clobbers_hbr (rtx insn)
1901 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
1903 rtx parallel = PATTERN (insn);
1906 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
1908 clobber = XVECEXP (parallel, 0, j);
1909 if (GET_CODE (clobber) == CLOBBER
1910 && GET_CODE (XEXP (clobber, 0)) == REG
1911 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
1919 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
1922 rtx hint, insn, prev, next;
1924 if (before == 0 || branch == 0 || target == 0)
1931 branch_label = gen_label_rtx ();
1932 LABEL_NUSES (branch_label)++;
1933 LABEL_PRESERVE_P (branch_label) = 1;
1934 insn = emit_label_before (branch_label, branch);
1935 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
1937 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1938 the current insn is pipe0, dual issue with it. */
1939 prev = prev_active_insn (before);
1940 if (prev && get_pipe (prev) == 0)
1941 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1942 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
1944 next = next_active_insn (before);
1945 hint = emit_insn_after (gen_hbr (branch_label, target), before);
1947 PUT_MODE (next, TImode);
1951 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1952 PUT_MODE (hint, TImode);
1954 recog_memoized (hint);
1957 /* Returns 0 if we don't want a hint for this branch. Otherwise return
1958 the rtx for the branch target. */
1960 get_branch_target (rtx branch)
1962 if (GET_CODE (branch) == JUMP_INSN)
1966 /* Return statements */
1967 if (GET_CODE (PATTERN (branch)) == RETURN)
1968 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1971 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
1972 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
1975 set = single_set (branch);
1976 src = SET_SRC (set);
1977 if (GET_CODE (SET_DEST (set)) != PC)
1980 if (GET_CODE (src) == IF_THEN_ELSE)
1983 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
1986 /* If the more probable case is not a fall through, then
1987 try a branch hint. */
1988 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
1989 if (prob > (REG_BR_PROB_BASE * 6 / 10)
1990 && GET_CODE (XEXP (src, 1)) != PC)
1991 lab = XEXP (src, 1);
1992 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
1993 && GET_CODE (XEXP (src, 2)) != PC)
1994 lab = XEXP (src, 2);
1998 if (GET_CODE (lab) == RETURN)
1999 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2007 else if (GET_CODE (branch) == CALL_INSN)
2010 /* All of our call patterns are in a PARALLEL and the CALL is
2011 the first pattern in the PARALLEL. */
2012 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2014 call = XVECEXP (PATTERN (branch), 0, 0);
2015 if (GET_CODE (call) == SET)
2016 call = SET_SRC (call);
2017 if (GET_CODE (call) != CALL)
2019 return XEXP (XEXP (call, 0), 0);
2025 insert_branch_hints (void)
2027 struct spu_bb_info *spu_bb_info;
2028 rtx branch, insn, next;
2029 rtx branch_target = 0;
2030 int branch_addr = 0, insn_addr, head_addr;
2035 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
2036 sizeof (struct spu_bb_info));
2038 /* We need exact insn addresses and lengths. */
2039 shorten_branches (get_insns ());
2041 FOR_EACH_BB_REVERSE (bb)
2043 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
2045 if (spu_bb_info[bb->index].prop_jump)
2047 branch = spu_bb_info[bb->index].prop_jump;
2048 branch_target = get_branch_target (branch);
2049 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2051 /* Search from end of a block to beginning. In this loop, find
2052 jumps which need a branch and emit them only when:
2053 - it's an indirect branch and we're at the insn which sets
2055 - we're at an insn that will invalidate the hint. e.g., a
2056 call, another hint insn, inline asm that clobbers $hbr, and
2057 some inlined operations (divmodsi4). Don't consider jumps
2058 because they are only at the end of a block and are
2059 considered when we are deciding whether to propagate
2060 - we're getting too far away from the branch. The hbr insns
2061 only have a signed 10-bit offset
2062 We go back as far as possible so the branch will be considered
2063 for propagation when we get to the beginning of the block. */
2065 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2069 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2071 && ((GET_CODE (branch_target) == REG
2072 && set_of (branch_target, insn) != NULL_RTX)
2073 || insn_clobbers_hbr (insn)
2074 || branch_addr - insn_addr > 600))
2076 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2077 if (insn != BB_END (bb)
2078 && branch_addr - next_addr >= spu_hint_dist)
2082 "hint for %i in block %i before %i\n",
2083 INSN_UID (branch), bb->index, INSN_UID (next));
2084 spu_emit_branch_hint (next, branch, branch_target,
2085 branch_addr - next_addr);
2090 /* JUMP_P will only be true at the end of a block. When
2091 branch is already set it means we've previously decided
2092 to propagate a hint for that branch into this block. */
2093 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2096 if ((branch_target = get_branch_target (insn)))
2099 branch_addr = insn_addr;
2103 /* When a branch hint is emitted it will be inserted
2104 before "next". Make sure next is the beginning of a
2105 cycle to minimize impact on the scheduled insns. */
2106 if (GET_MODE (insn) == TImode)
2109 if (insn == BB_HEAD (bb))
2115 /* If we haven't emitted a hint for this branch yet, it might
2116 be profitable to emit it in one of the predecessor blocks,
2117 especially for loops. */
2119 basic_block prev = 0, prop = 0, prev2 = 0;
2120 int loop_exit = 0, simple_loop = 0;
2123 next_addr = INSN_ADDRESSES (INSN_UID (next));
2125 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2126 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2127 prev = EDGE_PRED (bb, j)->src;
2129 prev2 = EDGE_PRED (bb, j)->src;
2131 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2132 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2134 else if (EDGE_SUCC (bb, j)->dest == bb)
2137 /* If this branch is a loop exit then propagate to previous
2138 fallthru block. This catches the cases when it is a simple
2139 loop or when there is an initial branch into the loop. */
2140 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2143 /* If there is only one adjacent predecessor. Don't propagate
2144 outside this loop. This loop_depth test isn't perfect, but
2145 I'm not sure the loop_father member is valid at this point. */
2146 else if (prev && single_pred_p (bb)
2147 && prev->loop_depth == bb->loop_depth)
2150 /* If this is the JOIN block of a simple IF-THEN then
2151 propagate the hint to the HEADER block. */
2152 else if (prev && prev2
2153 && EDGE_COUNT (bb->preds) == 2
2154 && EDGE_COUNT (prev->preds) == 1
2155 && EDGE_PRED (prev, 0)->src == prev2
2156 && prev2->loop_depth == bb->loop_depth
2157 && GET_CODE (branch_target) != REG)
2160 /* Don't propagate when:
2161 - this is a simple loop and the hint would be too far
2162 - this is not a simple loop and there are 16 insns in
2164 - the predecessor block ends in a branch that will be
2166 - the predecessor block ends in an insn that invalidates
2170 && (bbend = BB_END (prop))
2171 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2172 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2173 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2176 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2177 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2178 bb->index, prop->index, bb->loop_depth,
2179 INSN_UID (branch), loop_exit, simple_loop,
2180 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2182 spu_bb_info[prop->index].prop_jump = branch;
2183 spu_bb_info[prop->index].bb = bb;
2185 else if (next && branch_addr - next_addr >= spu_hint_dist)
2188 fprintf (dump_file, "hint for %i in block %i before %i\n",
2189 INSN_UID (branch), bb->index, INSN_UID (next));
2190 spu_emit_branch_hint (next, branch, branch_target,
2191 branch_addr - next_addr);
2199 /* Emit a nop for INSN such that the two will dual issue. This assumes
2200 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2201 We check for TImode to handle a MULTI1 insn which has dual issued its
2202 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2205 emit_nop_for_insn (rtx insn)
2209 p = get_pipe (insn);
2210 if (p == 1 && GET_MODE (insn) == TImode)
2212 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2213 PUT_MODE (new_insn, TImode);
2214 PUT_MODE (insn, VOIDmode);
2217 new_insn = emit_insn_after (gen_lnop (), insn);
2220 /* Insert nops in basic blocks to meet dual issue alignment
2225 rtx insn, next_insn, prev_insn;
2229 /* This sets up INSN_ADDRESSES. */
2230 shorten_branches (get_insns ());
2232 /* Keep track of length added by nops. */
2236 for (insn = get_insns (); insn; insn = next_insn)
2238 next_insn = next_active_insn (insn);
2239 addr = INSN_ADDRESSES (INSN_UID (insn));
2240 if (GET_MODE (insn) == TImode
2242 && GET_MODE (next_insn) != TImode
2243 && ((addr + length) & 7) != 0)
2245 /* prev_insn will always be set because the first insn is
2246 always 8-byte aligned. */
2247 emit_nop_for_insn (prev_insn);
2255 spu_machine_dependent_reorg (void)
2259 if (TARGET_BRANCH_HINTS)
2260 insert_branch_hints ();
2266 /* Insn scheduling routines, primarily for dual issue. */
2268 spu_sched_issue_rate (void)
2274 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2275 int verbose ATTRIBUTE_UNUSED, rtx insn,
2278 if (GET_CODE (PATTERN (insn)) != USE
2279 && GET_CODE (PATTERN (insn)) != CLOBBER
2280 && get_pipe (insn) != -2)
2282 return can_issue_more;
2289 /* Handle inline asm */
2290 if (INSN_CODE (insn) == -1)
2292 t = get_attr_type (insn);
2308 case TYPE_IPREFETCH:
2325 spu_sched_adjust_priority (rtx insn, int pri)
2327 int p = get_pipe (insn);
2328 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2330 if (GET_CODE (PATTERN (insn)) == USE
2331 || GET_CODE (PATTERN (insn)) == CLOBBER
2334 /* Schedule pipe0 insns early for greedier dual issue. */
2340 /* INSN is dependent on DEP_INSN. */
2342 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2343 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2345 if (GET_CODE (insn) == CALL_INSN)
2347 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2348 scheduler makes every insn in a block anti-dependent on the final
2349 jump_insn. We adjust here so higher cost insns will get scheduled
2351 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2352 return insn_cost (dep_insn) - 3;
2356 /* Create a CONST_DOUBLE from a string. */
2358 spu_float_const (const char *string, enum machine_mode mode)
2360 REAL_VALUE_TYPE value;
2361 value = REAL_VALUE_ATOF (string, mode);
2362 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2365 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2366 CONST_INT fits constraint 'K', i.e., is small. */
2368 legitimate_const (rtx x, int aligned)
2370 /* We can never know if the resulting address fits in 18 bits and can be
2371 loaded with ila. Instead we should use the HI and LO relocations to
2372 load a 32-bit address. */
2375 gcc_assert (GET_CODE (x) == CONST);
2377 if (GET_CODE (XEXP (x, 0)) != PLUS)
2379 sym = XEXP (XEXP (x, 0), 0);
2380 cst = XEXP (XEXP (x, 0), 1);
2381 if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT)
2383 if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym)))
2385 return satisfies_constraint_K (cst);
2389 spu_constant_address_p (rtx x)
2391 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2392 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2393 || GET_CODE (x) == HIGH);
2396 static enum spu_immediate
2397 which_immediate_load (HOST_WIDE_INT val)
2399 gcc_assert (val == trunc_int_for_mode (val, SImode));
2401 if (val >= -0x8000 && val <= 0x7fff)
2403 if (val >= 0 && val <= 0x3ffff)
2405 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2407 if ((val & 0xffff) == 0)
2413 /* Return true when OP can be loaded by one of the il instructions, or
2414 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2416 immediate_load_p (rtx op, enum machine_mode mode)
2418 if (CONSTANT_P (op))
2420 enum immediate_class c = classify_immediate (op, mode);
2421 return c == IC_IL1 || c == IC_IL1s
2422 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
2427 /* Return true if the first SIZE bytes of arr is a constant that can be
2428 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2429 represent the size and offset of the instruction to use. */
2431 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2433 int cpat, run, i, start;
2437 for (i = 0; i < size && cpat; i++)
2445 else if (arr[i] == 2 && arr[i+1] == 3)
2447 else if (arr[i] == 0)
2449 while (arr[i+run] == run && i+run < 16)
2451 if (run != 4 && run != 8)
2456 if ((i & (run-1)) != 0)
2463 if (cpat && (run || size < 16))
2470 *pstart = start == -1 ? 16-run : start;
2476 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2477 it into a register. MODE is only valid when OP is a CONST_INT. */
2478 static enum immediate_class
2479 classify_immediate (rtx op, enum machine_mode mode)
2482 unsigned char arr[16];
2483 int i, j, repeated, fsmbi, repeat;
2485 gcc_assert (CONSTANT_P (op));
2487 if (GET_MODE (op) != VOIDmode)
2488 mode = GET_MODE (op);
2490 /* A V4SI const_vector with all identical symbols is ok. */
2493 && GET_CODE (op) == CONST_VECTOR
2494 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2495 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2496 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2497 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2498 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2499 op = CONST_VECTOR_ELT (op, 0);
2501 switch (GET_CODE (op))
2505 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
2508 return TARGET_LARGE_MEM
2509 || !legitimate_const (op, 0) ? IC_IL2s : IC_IL1s;
2515 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2516 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2517 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2523 constant_to_array (mode, op, arr);
2525 /* Check that each 4-byte slot is identical. */
2527 for (i = 4; i < 16; i += 4)
2528 for (j = 0; j < 4; j++)
2529 if (arr[j] != arr[i + j])
2534 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2535 val = trunc_int_for_mode (val, SImode);
2537 if (which_immediate_load (val) != SPU_NONE)
2541 /* Any mode of 2 bytes or smaller can be loaded with an il
2543 gcc_assert (GET_MODE_SIZE (mode) > 2);
2547 for (i = 0; i < 16 && fsmbi; i++)
2548 if (arr[i] != 0 && repeat == 0)
2550 else if (arr[i] != 0 && arr[i] != repeat)
2553 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
2555 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2568 static enum spu_immediate
2569 which_logical_immediate (HOST_WIDE_INT val)
2571 gcc_assert (val == trunc_int_for_mode (val, SImode));
2573 if (val >= -0x200 && val <= 0x1ff)
2575 if (val >= 0 && val <= 0xffff)
2577 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2579 val = trunc_int_for_mode (val, HImode);
2580 if (val >= -0x200 && val <= 0x1ff)
2582 if ((val & 0xff) == ((val >> 8) & 0xff))
2584 val = trunc_int_for_mode (val, QImode);
2585 if (val >= -0x200 && val <= 0x1ff)
2592 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2595 const_vector_immediate_p (rtx x)
2598 gcc_assert (GET_CODE (x) == CONST_VECTOR);
2599 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2600 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2601 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2607 logical_immediate_p (rtx op, enum machine_mode mode)
2610 unsigned char arr[16];
2613 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2614 || GET_CODE (op) == CONST_VECTOR);
2616 if (GET_CODE (op) == CONST_VECTOR
2617 && !const_vector_immediate_p (op))
2620 if (GET_MODE (op) != VOIDmode)
2621 mode = GET_MODE (op);
2623 constant_to_array (mode, op, arr);
2625 /* Check that bytes are repeated. */
2626 for (i = 4; i < 16; i += 4)
2627 for (j = 0; j < 4; j++)
2628 if (arr[j] != arr[i + j])
2631 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2632 val = trunc_int_for_mode (val, SImode);
2634 i = which_logical_immediate (val);
2635 return i != SPU_NONE && i != SPU_IOHL;
2639 iohl_immediate_p (rtx op, enum machine_mode mode)
2642 unsigned char arr[16];
2645 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2646 || GET_CODE (op) == CONST_VECTOR);
2648 if (GET_CODE (op) == CONST_VECTOR
2649 && !const_vector_immediate_p (op))
2652 if (GET_MODE (op) != VOIDmode)
2653 mode = GET_MODE (op);
2655 constant_to_array (mode, op, arr);
2657 /* Check that bytes are repeated. */
2658 for (i = 4; i < 16; i += 4)
2659 for (j = 0; j < 4; j++)
2660 if (arr[j] != arr[i + j])
2663 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2664 val = trunc_int_for_mode (val, SImode);
2666 return val >= 0 && val <= 0xffff;
2670 arith_immediate_p (rtx op, enum machine_mode mode,
2671 HOST_WIDE_INT low, HOST_WIDE_INT high)
2674 unsigned char arr[16];
2677 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2678 || GET_CODE (op) == CONST_VECTOR);
2680 if (GET_CODE (op) == CONST_VECTOR
2681 && !const_vector_immediate_p (op))
2684 if (GET_MODE (op) != VOIDmode)
2685 mode = GET_MODE (op);
2687 constant_to_array (mode, op, arr);
2689 if (VECTOR_MODE_P (mode))
2690 mode = GET_MODE_INNER (mode);
2692 bytes = GET_MODE_SIZE (mode);
2693 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2695 /* Check that bytes are repeated. */
2696 for (i = bytes; i < 16; i += bytes)
2697 for (j = 0; j < bytes; j++)
2698 if (arr[j] != arr[i + j])
2702 for (j = 1; j < bytes; j++)
2703 val = (val << 8) | arr[j];
2705 val = trunc_int_for_mode (val, mode);
2707 return val >= low && val <= high;
2711 - any 32-bit constant (SImode, SFmode)
2712 - any constant that can be generated with fsmbi (any mode)
2713 - a 64-bit constant where the high and low bits are identical
2715 - a 128-bit constant where the four 32-bit words match. */
2717 spu_legitimate_constant_p (rtx x)
2719 if (GET_CODE (x) == HIGH)
2721 /* V4SI with all identical symbols is valid. */
2723 && GET_MODE (x) == V4SImode
2724 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2725 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2726 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
2727 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2728 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2729 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2731 if (GET_CODE (x) == CONST_VECTOR
2732 && !const_vector_immediate_p (x))
2737 /* Valid address are:
2738 - symbol_ref, label_ref, const
2740 - reg + const, where either reg or const is 16 byte aligned
2741 - reg + reg, alignment doesn't matter
2742 The alignment matters in the reg+const case because lqd and stqd
2743 ignore the 4 least significant bits of the const. (TODO: It might be
2744 preferable to allow any alignment and fix it up when splitting.) */
2746 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2747 rtx x, int reg_ok_strict)
2749 if (mode == TImode && GET_CODE (x) == AND
2750 && GET_CODE (XEXP (x, 1)) == CONST_INT
2751 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2753 switch (GET_CODE (x))
2757 return !TARGET_LARGE_MEM;
2760 return !TARGET_LARGE_MEM && legitimate_const (x, 0);
2763 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2767 gcc_assert (GET_CODE (x) == REG);
2770 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2775 rtx op0 = XEXP (x, 0);
2776 rtx op1 = XEXP (x, 1);
2777 if (GET_CODE (op0) == SUBREG)
2778 op0 = XEXP (op0, 0);
2779 if (GET_CODE (op1) == SUBREG)
2780 op1 = XEXP (op1, 0);
2781 /* We can't just accept any aligned register because CSE can
2782 change it to a register that is not marked aligned and then
2783 recog will fail. So we only accept frame registers because
2784 they will only be changed to other frame registers. */
2785 if (GET_CODE (op0) == REG
2786 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2787 && GET_CODE (op1) == CONST_INT
2788 && INTVAL (op1) >= -0x2000
2789 && INTVAL (op1) <= 0x1fff
2790 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2792 if (GET_CODE (op0) == REG
2793 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2794 && GET_CODE (op1) == REG
2795 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2806 /* When the address is reg + const_int, force the const_int into a
2809 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2810 enum machine_mode mode)
2813 /* Make sure both operands are registers. */
2814 if (GET_CODE (x) == PLUS)
2818 if (ALIGNED_SYMBOL_REF_P (op0))
2820 op0 = force_reg (Pmode, op0);
2821 mark_reg_pointer (op0, 128);
2823 else if (GET_CODE (op0) != REG)
2824 op0 = force_reg (Pmode, op0);
2825 if (ALIGNED_SYMBOL_REF_P (op1))
2827 op1 = force_reg (Pmode, op1);
2828 mark_reg_pointer (op1, 128);
2830 else if (GET_CODE (op1) != REG)
2831 op1 = force_reg (Pmode, op1);
2832 x = gen_rtx_PLUS (Pmode, op0, op1);
2833 if (spu_legitimate_address (mode, x, 0))
2839 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2840 struct attribute_spec.handler. */
2842 spu_handle_fndecl_attribute (tree * node,
2844 tree args ATTRIBUTE_UNUSED,
2845 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2847 if (TREE_CODE (*node) != FUNCTION_DECL)
2849 warning (0, "`%s' attribute only applies to functions",
2850 IDENTIFIER_POINTER (name));
2851 *no_add_attrs = true;
2857 /* Handle the "vector" attribute. */
2859 spu_handle_vector_attribute (tree * node, tree name,
2860 tree args ATTRIBUTE_UNUSED,
2861 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2863 tree type = *node, result = NULL_TREE;
2864 enum machine_mode mode;
2867 while (POINTER_TYPE_P (type)
2868 || TREE_CODE (type) == FUNCTION_TYPE
2869 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2870 type = TREE_TYPE (type);
2872 mode = TYPE_MODE (type);
2874 unsigned_p = TYPE_UNSIGNED (type);
2878 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2881 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2884 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
2887 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
2890 result = V4SF_type_node;
2893 result = V2DF_type_node;
2899 /* Propagate qualifiers attached to the element type
2900 onto the vector type. */
2901 if (result && result != type && TYPE_QUALS (type))
2902 result = build_qualified_type (result, TYPE_QUALS (type));
2904 *no_add_attrs = true; /* No need to hang on to the attribute. */
2907 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
2909 *node = reconstruct_complex_type (*node, result);
2914 /* Return nonzero if FUNC is a naked function. */
2916 spu_naked_function_p (tree func)
2920 if (TREE_CODE (func) != FUNCTION_DECL)
2923 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
2924 return a != NULL_TREE;
2928 spu_initial_elimination_offset (int from, int to)
2930 int saved_regs_size = spu_saved_regs_size ();
2932 if (!current_function_is_leaf || current_function_outgoing_args_size
2933 || get_frame_size () || saved_regs_size)
2934 sp_offset = STACK_POINTER_OFFSET;
2935 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2936 return (sp_offset + current_function_outgoing_args_size);
2937 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2939 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2940 return sp_offset + current_function_outgoing_args_size
2941 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
2942 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2943 return get_frame_size () + saved_regs_size + sp_offset;
2948 spu_function_value (tree type, tree func ATTRIBUTE_UNUSED)
2950 enum machine_mode mode = TYPE_MODE (type);
2951 int byte_size = ((mode == BLKmode)
2952 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2954 /* Make sure small structs are left justified in a register. */
2955 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2956 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
2958 enum machine_mode smode;
2961 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2962 int n = byte_size / UNITS_PER_WORD;
2963 v = rtvec_alloc (nregs);
2964 for (i = 0; i < n; i++)
2966 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
2967 gen_rtx_REG (TImode,
2970 GEN_INT (UNITS_PER_WORD * i));
2971 byte_size -= UNITS_PER_WORD;
2979 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2981 gen_rtx_EXPR_LIST (VOIDmode,
2982 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
2983 GEN_INT (UNITS_PER_WORD * n));
2985 return gen_rtx_PARALLEL (mode, v);
2987 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
2991 spu_function_arg (CUMULATIVE_ARGS cum,
2992 enum machine_mode mode,
2993 tree type, int named ATTRIBUTE_UNUSED)
2997 if (cum >= MAX_REGISTER_ARGS)
3000 byte_size = ((mode == BLKmode)
3001 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3003 /* The ABI does not allow parameters to be passed partially in
3004 reg and partially in stack. */
3005 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3008 /* Make sure small structs are left justified in a register. */
3009 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3010 && byte_size < UNITS_PER_WORD && byte_size > 0)
3012 enum machine_mode smode;
3016 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3017 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3018 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3020 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3023 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3026 /* Variable sized types are passed by reference. */
3028 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3029 enum machine_mode mode ATTRIBUTE_UNUSED,
3030 tree type, bool named ATTRIBUTE_UNUSED)
3032 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3038 /* Create and return the va_list datatype.
3040 On SPU, va_list is an array type equivalent to
3042 typedef struct __va_list_tag
3044 void *__args __attribute__((__aligned(16)));
3045 void *__skip __attribute__((__aligned(16)));
3049 where __args points to the arg that will be returned by the next
3050 va_arg(), and __skip points to the previous stack frame such that
3051 when __args == __skip we should advance __args by 32 bytes. */
3053 spu_build_builtin_va_list (void)
3055 tree f_args, f_skip, record, type_decl;
3058 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3061 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3063 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3064 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3066 DECL_FIELD_CONTEXT (f_args) = record;
3067 DECL_ALIGN (f_args) = 128;
3068 DECL_USER_ALIGN (f_args) = 1;
3070 DECL_FIELD_CONTEXT (f_skip) = record;
3071 DECL_ALIGN (f_skip) = 128;
3072 DECL_USER_ALIGN (f_skip) = 1;
3074 TREE_CHAIN (record) = type_decl;
3075 TYPE_NAME (record) = type_decl;
3076 TYPE_FIELDS (record) = f_args;
3077 TREE_CHAIN (f_args) = f_skip;
3079 /* We know this is being padded and we want it too. It is an internal
3080 type so hide the warnings from the user. */
3082 warn_padded = false;
3084 layout_type (record);
3088 /* The correct type is an array type of one element. */
3089 return build_array_type (record, build_index_type (size_zero_node));
3092 /* Implement va_start by filling the va_list structure VALIST.
3093 NEXTARG points to the first anonymous stack argument.
3095 The following global variables are used to initialize
3096 the va_list structure:
3098 current_function_args_info;
3099 the CUMULATIVE_ARGS for this function
3101 current_function_arg_offset_rtx:
3102 holds the offset of the first anonymous stack argument
3103 (relative to the virtual arg pointer). */
3106 spu_va_start (tree valist, rtx nextarg)
3108 tree f_args, f_skip;
3111 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3112 f_skip = TREE_CHAIN (f_args);
3114 valist = build_va_arg_indirect_ref (valist);
3116 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3118 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3120 /* Find the __args area. */
3121 t = make_tree (TREE_TYPE (args), nextarg);
3122 if (current_function_pretend_args_size > 0)
3123 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3124 size_int (-STACK_POINTER_OFFSET));
3125 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
3126 TREE_SIDE_EFFECTS (t) = 1;
3127 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3129 /* Find the __skip area. */
3130 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3131 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3132 size_int (current_function_pretend_args_size
3133 - STACK_POINTER_OFFSET));
3134 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
3135 TREE_SIDE_EFFECTS (t) = 1;
3136 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3139 /* Gimplify va_arg by updating the va_list structure
3140 VALIST as required to retrieve an argument of type
3141 TYPE, and returning that argument.
3143 ret = va_arg(VALIST, TYPE);
3145 generates code equivalent to:
3147 paddedsize = (sizeof(TYPE) + 15) & -16;
3148 if (VALIST.__args + paddedsize > VALIST.__skip
3149 && VALIST.__args <= VALIST.__skip)
3150 addr = VALIST.__skip + 32;
3152 addr = VALIST.__args;
3153 VALIST.__args = addr + paddedsize;
3154 ret = *(TYPE *)addr;
3157 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
3158 tree * post_p ATTRIBUTE_UNUSED)
3160 tree f_args, f_skip;
3162 HOST_WIDE_INT size, rsize;
3163 tree paddedsize, addr, tmp;
3164 bool pass_by_reference_p;
3166 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3167 f_skip = TREE_CHAIN (f_args);
3169 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3171 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3173 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3175 addr = create_tmp_var (ptr_type_node, "va_arg");
3176 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3178 /* if an object is dynamically sized, a pointer to it is passed
3179 instead of the object itself. */
3180 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3182 if (pass_by_reference_p)
3183 type = build_pointer_type (type);
3184 size = int_size_in_bytes (type);
3185 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3187 /* build conditional expression to calculate addr. The expression
3188 will be gimplified later. */
3189 paddedsize = size_int (rsize);
3190 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, args, paddedsize);
3191 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3192 build2 (GT_EXPR, boolean_type_node, tmp, skip),
3193 build2 (LE_EXPR, boolean_type_node, args, skip));
3195 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3196 build2 (POINTER_PLUS_EXPR, ptr_type_node, skip,
3197 size_int (32)), args);
3199 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
3200 gimplify_and_add (tmp, pre_p);
3202 /* update VALIST.__args */
3203 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
3204 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
3205 gimplify_and_add (tmp, pre_p);
3207 addr = fold_convert (build_pointer_type (type), addr);
3209 if (pass_by_reference_p)
3210 addr = build_va_arg_indirect_ref (addr);
3212 return build_va_arg_indirect_ref (addr);
3215 /* Save parameter registers starting with the register that corresponds
3216 to the first unnamed parameters. If the first unnamed parameter is
3217 in the stack then save no registers. Set pretend_args_size to the
3218 amount of space needed to save the registers. */
3220 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3221 tree type, int *pretend_size, int no_rtl)
3230 /* cum currently points to the last named argument, we want to
3231 start at the next argument. */
3232 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3234 offset = -STACK_POINTER_OFFSET;
3235 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3237 tmp = gen_frame_mem (V4SImode,
3238 plus_constant (virtual_incoming_args_rtx,
3240 emit_move_insn (tmp,
3241 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3244 *pretend_size = offset + STACK_POINTER_OFFSET;
3249 spu_conditional_register_usage (void)
3253 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3254 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3258 /* This is called to decide when we can simplify a load instruction. We
3259 must only return true for registers which we know will always be
3260 aligned. Taking into account that CSE might replace this reg with
3261 another one that has not been marked aligned.
3262 So this is really only true for frame, stack and virtual registers,
3263 which we know are always aligned and should not be adversely effected
3266 regno_aligned_for_load (int regno)
3268 return regno == FRAME_POINTER_REGNUM
3269 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
3270 || regno == STACK_POINTER_REGNUM
3271 || (regno >= FIRST_VIRTUAL_REGISTER
3272 && regno <= LAST_VIRTUAL_REGISTER);
3275 /* Return TRUE when mem is known to be 16-byte aligned. */
3277 aligned_mem_p (rtx mem)
3279 if (MEM_ALIGN (mem) >= 128)
3281 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3283 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3285 rtx p0 = XEXP (XEXP (mem, 0), 0);
3286 rtx p1 = XEXP (XEXP (mem, 0), 1);
3287 if (regno_aligned_for_load (REGNO (p0)))
3289 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3291 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3295 else if (GET_CODE (XEXP (mem, 0)) == REG)
3297 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3300 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3302 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3304 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3305 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3306 if (GET_CODE (p0) == SYMBOL_REF
3307 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3313 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3314 into its SYMBOL_REF_FLAGS. */
3316 spu_encode_section_info (tree decl, rtx rtl, int first)
3318 default_encode_section_info (decl, rtl, first);
3320 /* If a variable has a forced alignment to < 16 bytes, mark it with
3321 SYMBOL_FLAG_ALIGN1. */
3322 if (TREE_CODE (decl) == VAR_DECL
3323 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3324 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3327 /* Return TRUE if we are certain the mem refers to a complete object
3328 which is both 16-byte aligned and padded to a 16-byte boundary. This
3329 would make it safe to store with a single instruction.
3330 We guarantee the alignment and padding for static objects by aligning
3331 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3332 FIXME: We currently cannot guarantee this for objects on the stack
3333 because assign_parm_setup_stack calls assign_stack_local with the
3334 alignment of the parameter mode and in that case the alignment never
3335 gets adjusted by LOCAL_ALIGNMENT. */
3337 store_with_one_insn_p (rtx mem)
3339 rtx addr = XEXP (mem, 0);
3340 if (GET_MODE (mem) == BLKmode)
3342 /* Only static objects. */
3343 if (GET_CODE (addr) == SYMBOL_REF)
3345 /* We use the associated declaration to make sure the access is
3346 referring to the whole object.
3347 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3348 if it is necessary. Will there be cases where one exists, and
3349 the other does not? Will there be cases where both exist, but
3350 have different types? */
3351 tree decl = MEM_EXPR (mem);
3353 && TREE_CODE (decl) == VAR_DECL
3354 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3356 decl = SYMBOL_REF_DECL (addr);
3358 && TREE_CODE (decl) == VAR_DECL
3359 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3366 spu_expand_mov (rtx * ops, enum machine_mode mode)
3368 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3371 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3373 rtx from = SUBREG_REG (ops[1]);
3374 enum machine_mode imode = GET_MODE (from);
3376 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3377 && GET_MODE_CLASS (imode) == MODE_INT
3378 && subreg_lowpart_p (ops[1]));
3380 if (GET_MODE_SIZE (imode) < 4)
3382 from = gen_rtx_SUBREG (SImode, from, 0);
3386 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3388 enum insn_code icode = trunc_optab->handlers[mode][imode].insn_code;
3389 emit_insn (GEN_FCN (icode) (ops[0], from));
3392 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3396 /* At least one of the operands needs to be a register. */
3397 if ((reload_in_progress | reload_completed) == 0
3398 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3400 rtx temp = force_reg (mode, ops[1]);
3401 emit_move_insn (ops[0], temp);
3404 if (reload_in_progress || reload_completed)
3406 if (CONSTANT_P (ops[1]))
3407 return spu_split_immediate (ops);
3412 if (GET_CODE (ops[0]) == MEM)
3414 if (!spu_valid_move (ops))
3416 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3417 gen_reg_rtx (TImode)));
3421 else if (GET_CODE (ops[1]) == MEM)
3423 if (!spu_valid_move (ops))
3426 (ops[0], ops[1], gen_reg_rtx (TImode),
3427 gen_reg_rtx (SImode)));
3431 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3433 if (GET_CODE (ops[1]) == CONST_INT)
3435 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3436 if (val != INTVAL (ops[1]))
3438 emit_move_insn (ops[0], GEN_INT (val));
3449 /* For now, only frame registers are known to be aligned at all times.
3450 We can't trust REGNO_POINTER_ALIGN because optimization will move
3451 registers around, potentially changing an "aligned" register in an
3452 address to an unaligned register, which would result in an invalid
3454 int regno = REGNO (reg);
3455 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3459 spu_split_load (rtx * ops)
3461 enum machine_mode mode = GET_MODE (ops[0]);
3462 rtx addr, load, rot, mem, p0, p1;
3465 addr = XEXP (ops[1], 0);
3469 if (GET_CODE (addr) == PLUS)
3472 aligned reg + aligned reg => lqx
3473 aligned reg + unaligned reg => lqx, rotqby
3474 aligned reg + aligned const => lqd
3475 aligned reg + unaligned const => lqd, rotqbyi
3476 unaligned reg + aligned reg => lqx, rotqby
3477 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3478 unaligned reg + aligned const => lqd, rotqby
3479 unaligned reg + unaligned const -> not allowed by legitimate address
3481 p0 = XEXP (addr, 0);
3482 p1 = XEXP (addr, 1);
3483 if (reg_align (p0) < 128)
3485 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3487 emit_insn (gen_addsi3 (ops[3], p0, p1));
3495 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3497 rot_amt = INTVAL (p1) & 15;
3498 p1 = GEN_INT (INTVAL (p1) & -16);
3499 addr = gen_rtx_PLUS (SImode, p0, p1);
3501 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3505 else if (GET_CODE (addr) == REG)
3507 if (reg_align (addr) < 128)
3510 else if (GET_CODE (addr) == CONST)
3512 if (GET_CODE (XEXP (addr, 0)) == PLUS
3513 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3514 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3516 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3518 addr = gen_rtx_CONST (Pmode,
3519 gen_rtx_PLUS (Pmode,
3520 XEXP (XEXP (addr, 0), 0),
3521 GEN_INT (rot_amt & -16)));
3523 addr = XEXP (XEXP (addr, 0), 0);
3528 else if (GET_CODE (addr) == CONST_INT)
3530 rot_amt = INTVAL (addr);
3531 addr = GEN_INT (rot_amt & -16);
3533 else if (!ALIGNED_SYMBOL_REF_P (addr))
3536 if (GET_MODE_SIZE (mode) < 4)
3537 rot_amt += GET_MODE_SIZE (mode) - 4;
3543 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3550 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3551 mem = change_address (ops[1], TImode, addr);
3553 emit_insn (gen_movti (load, mem));
3556 emit_insn (gen_rotqby_ti (load, load, rot));
3558 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3560 if (reload_completed)
3561 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3563 emit_insn (gen_spu_convert (ops[0], load));
3567 spu_split_store (rtx * ops)
3569 enum machine_mode mode = GET_MODE (ops[0]);
3572 rtx addr, p0, p1, p1_lo, smem;
3576 addr = XEXP (ops[0], 0);
3578 if (GET_CODE (addr) == PLUS)
3581 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3582 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3583 aligned reg + aligned const => lqd, c?d, shuf, stqx
3584 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3585 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3586 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3587 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3588 unaligned reg + unaligned const -> not allowed by legitimate address
3591 p0 = XEXP (addr, 0);
3592 p1 = p1_lo = XEXP (addr, 1);
3593 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3595 p1_lo = GEN_INT (INTVAL (p1) & 15);
3596 p1 = GEN_INT (INTVAL (p1) & -16);
3597 addr = gen_rtx_PLUS (SImode, p0, p1);
3600 else if (GET_CODE (addr) == REG)
3604 p1 = p1_lo = const0_rtx;
3609 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3610 p1 = 0; /* aform doesn't use p1 */
3612 if (ALIGNED_SYMBOL_REF_P (addr))
3614 else if (GET_CODE (addr) == CONST)
3616 if (GET_CODE (XEXP (addr, 0)) == PLUS
3617 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3618 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3620 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3622 addr = gen_rtx_CONST (Pmode,
3623 gen_rtx_PLUS (Pmode,
3624 XEXP (XEXP (addr, 0), 0),
3625 GEN_INT (v & -16)));
3627 addr = XEXP (XEXP (addr, 0), 0);
3628 p1_lo = GEN_INT (v & 15);
3631 else if (GET_CODE (addr) == CONST_INT)
3633 p1_lo = GEN_INT (INTVAL (addr) & 15);
3634 addr = GEN_INT (INTVAL (addr) & -16);
3638 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3640 scalar = store_with_one_insn_p (ops[0]);
3643 /* We could copy the flags from the ops[0] MEM to mem here,
3644 We don't because we want this load to be optimized away if
3645 possible, and copying the flags will prevent that in certain
3646 cases, e.g. consider the volatile flag. */
3648 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3649 set_mem_alias_set (lmem, 0);
3650 emit_insn (gen_movti (reg, lmem));
3652 if (!p0 || reg_align (p0) >= 128)
3653 p0 = stack_pointer_rtx;
3657 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3658 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3660 else if (reload_completed)
3662 if (GET_CODE (ops[1]) == REG)
3663 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3664 else if (GET_CODE (ops[1]) == SUBREG)
3665 emit_move_insn (reg,
3666 gen_rtx_REG (GET_MODE (reg),
3667 REGNO (SUBREG_REG (ops[1]))));
3673 if (GET_CODE (ops[1]) == REG)
3674 emit_insn (gen_spu_convert (reg, ops[1]));
3675 else if (GET_CODE (ops[1]) == SUBREG)
3676 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3681 if (GET_MODE_SIZE (mode) < 4 && scalar)
3682 emit_insn (gen_shlqby_ti
3683 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3685 smem = change_address (ops[0], TImode, addr);
3686 /* We can't use the previous alias set because the memory has changed
3687 size and can potentially overlap objects of other types. */
3688 set_mem_alias_set (smem, 0);
3690 emit_insn (gen_movti (smem, reg));
3693 /* Return TRUE if X is MEM which is a struct member reference
3694 and the member can safely be loaded and stored with a single
3695 instruction because it is padded. */
3697 mem_is_padded_component_ref (rtx x)
3699 tree t = MEM_EXPR (x);
3701 if (!t || TREE_CODE (t) != COMPONENT_REF)
3703 t = TREE_OPERAND (t, 1);
3704 if (!t || TREE_CODE (t) != FIELD_DECL
3705 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3707 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3708 r = DECL_FIELD_CONTEXT (t);
3709 if (!r || TREE_CODE (r) != RECORD_TYPE)
3711 /* Make sure they are the same mode */
3712 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3714 /* If there are no following fields then the field alignment assures
3715 the structure is padded to the alignment which means this field is
3717 if (TREE_CHAIN (t) == 0)
3719 /* If the following field is also aligned then this field will be
3722 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3727 /* Parse the -mfixed-range= option string. */
3729 fix_range (const char *const_str)
3732 char *str, *dash, *comma;
3734 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3735 REG2 are either register names or register numbers. The effect
3736 of this option is to mark the registers in the range from REG1 to
3737 REG2 as ``fixed'' so they won't be used by the compiler. */
3739 i = strlen (const_str);
3740 str = (char *) alloca (i + 1);
3741 memcpy (str, const_str, i + 1);
3745 dash = strchr (str, '-');
3748 warning (0, "value of -mfixed-range must have form REG1-REG2");
3752 comma = strchr (dash + 1, ',');
3756 first = decode_reg_name (str);
3759 warning (0, "unknown register name: %s", str);
3763 last = decode_reg_name (dash + 1);
3766 warning (0, "unknown register name: %s", dash + 1);
3774 warning (0, "%s-%s is an empty range", str, dash + 1);
3778 for (i = first; i <= last; ++i)
3779 fixed_regs[i] = call_used_regs[i] = 1;
3790 spu_valid_move (rtx * ops)
3792 enum machine_mode mode = GET_MODE (ops[0]);
3793 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3796 /* init_expr_once tries to recog against load and store insns to set
3797 the direct_load[] and direct_store[] arrays. We always want to
3798 consider those loads and stores valid. init_expr_once is called in
3799 the context of a dummy function which does not have a decl. */
3800 if (cfun->decl == 0)
3803 /* Don't allows loads/stores which would require more than 1 insn.
3804 During and after reload we assume loads and stores only take 1
3806 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3808 if (GET_CODE (ops[0]) == MEM
3809 && (GET_MODE_SIZE (mode) < 4
3810 || !(store_with_one_insn_p (ops[0])
3811 || mem_is_padded_component_ref (ops[0]))))
3813 if (GET_CODE (ops[1]) == MEM
3814 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3820 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3821 can be generated using the fsmbi instruction. */
3823 fsmbi_const_p (rtx x)
3827 /* We can always choose TImode for CONST_INT because the high bits
3828 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3829 enum immediate_class c = classify_immediate (x, TImode);
3830 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
3835 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3836 can be generated using the cbd, chd, cwd or cdd instruction. */
3838 cpat_const_p (rtx x, enum machine_mode mode)
3842 enum immediate_class c = classify_immediate (x, mode);
3843 return c == IC_CPAT;
3849 gen_cpat_const (rtx * ops)
3851 unsigned char dst[16];
3852 int i, offset, shift, isize;
3853 if (GET_CODE (ops[3]) != CONST_INT
3854 || GET_CODE (ops[2]) != CONST_INT
3855 || (GET_CODE (ops[1]) != CONST_INT
3856 && GET_CODE (ops[1]) != REG))
3858 if (GET_CODE (ops[1]) == REG
3859 && (!REG_POINTER (ops[1])
3860 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3863 for (i = 0; i < 16; i++)
3865 isize = INTVAL (ops[3]);
3868 else if (isize == 2)
3872 offset = (INTVAL (ops[2]) +
3873 (GET_CODE (ops[1]) ==
3874 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3875 for (i = 0; i < isize; i++)
3876 dst[offset + i] = i + shift;
3877 return array_to_constant (TImode, dst);
3880 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3881 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3882 than 16 bytes, the value is repeated across the rest of the array. */
3884 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3889 memset (arr, 0, 16);
3890 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3891 if (GET_CODE (x) == CONST_INT
3892 || (GET_CODE (x) == CONST_DOUBLE
3893 && (mode == SFmode || mode == DFmode)))
3895 gcc_assert (mode != VOIDmode && mode != BLKmode);
3897 if (GET_CODE (x) == CONST_DOUBLE)
3898 val = const_double_to_hwint (x);
3901 first = GET_MODE_SIZE (mode) - 1;
3902 for (i = first; i >= 0; i--)
3904 arr[i] = val & 0xff;
3907 /* Splat the constant across the whole array. */
3908 for (j = 0, i = first + 1; i < 16; i++)
3911 j = (j == first) ? 0 : j + 1;
3914 else if (GET_CODE (x) == CONST_DOUBLE)
3916 val = CONST_DOUBLE_LOW (x);
3917 for (i = 15; i >= 8; i--)
3919 arr[i] = val & 0xff;
3922 val = CONST_DOUBLE_HIGH (x);
3923 for (i = 7; i >= 0; i--)
3925 arr[i] = val & 0xff;
3929 else if (GET_CODE (x) == CONST_VECTOR)
3933 mode = GET_MODE_INNER (mode);
3934 units = CONST_VECTOR_NUNITS (x);
3935 for (i = 0; i < units; i++)
3937 elt = CONST_VECTOR_ELT (x, i);
3938 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
3940 if (GET_CODE (elt) == CONST_DOUBLE)
3941 val = const_double_to_hwint (elt);
3944 first = GET_MODE_SIZE (mode) - 1;
3945 if (first + i * GET_MODE_SIZE (mode) > 16)
3947 for (j = first; j >= 0; j--)
3949 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
3959 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
3960 smaller than 16 bytes, use the bytes that would represent that value
3961 in a register, e.g., for QImode return the value of arr[3]. */
3963 array_to_constant (enum machine_mode mode, unsigned char arr[16])
3965 enum machine_mode inner_mode;
3967 int units, size, i, j, k;
3970 if (GET_MODE_CLASS (mode) == MODE_INT
3971 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3973 j = GET_MODE_SIZE (mode);
3974 i = j < 4 ? 4 - j : 0;
3975 for (val = 0; i < j; i++)
3976 val = (val << 8) | arr[i];
3977 val = trunc_int_for_mode (val, mode);
3978 return GEN_INT (val);
3984 for (i = high = 0; i < 8; i++)
3985 high = (high << 8) | arr[i];
3986 for (i = 8, val = 0; i < 16; i++)
3987 val = (val << 8) | arr[i];
3988 return immed_double_const (val, high, TImode);
3992 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3993 val = trunc_int_for_mode (val, SImode);
3994 return hwint_to_const_double (SFmode, val);
3998 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4000 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
4001 return hwint_to_const_double (DFmode, val);
4004 if (!VECTOR_MODE_P (mode))
4007 units = GET_MODE_NUNITS (mode);
4008 size = GET_MODE_UNIT_SIZE (mode);
4009 inner_mode = GET_MODE_INNER (mode);
4010 v = rtvec_alloc (units);
4012 for (k = i = 0; i < units; ++i)
4015 for (j = 0; j < size; j++, k++)
4016 val = (val << 8) | arr[k];
4018 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4019 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4021 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4026 return gen_rtx_CONST_VECTOR (mode, v);
4030 reloc_diagnostic (rtx x)
4032 tree loc_decl, decl = 0;
4034 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4037 if (GET_CODE (x) == SYMBOL_REF)
4038 decl = SYMBOL_REF_DECL (x);
4039 else if (GET_CODE (x) == CONST
4040 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4041 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4043 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4044 if (decl && !DECL_P (decl))
4047 /* We use last_assemble_variable_decl to get line information. It's
4048 not always going to be right and might not even be close, but will
4049 be right for the more common cases. */
4050 if (!last_assemble_variable_decl || in_section == ctors_section)
4053 loc_decl = last_assemble_variable_decl;
4055 /* The decl could be a string constant. */
4056 if (decl && DECL_P (decl))
4057 msg = "%Jcreating run-time relocation for %qD";
4059 msg = "creating run-time relocation";
4061 if (TARGET_WARN_RELOC)
4062 warning (0, msg, loc_decl, decl);
4064 error (msg, loc_decl, decl);
4067 /* Hook into assemble_integer so we can generate an error for run-time
4068 relocations. The SPU ABI disallows them. */
4070 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4072 /* By default run-time relocations aren't supported, but we allow them
4073 in case users support it in their own run-time loader. And we provide
4074 a warning for those users that don't. */
4075 if ((GET_CODE (x) == SYMBOL_REF)
4076 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4077 reloc_diagnostic (x);
4079 return default_assemble_integer (x, size, aligned_p);
4083 spu_asm_globalize_label (FILE * file, const char *name)
4085 fputs ("\t.global\t", file);
4086 assemble_name (file, name);
4091 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
4093 enum machine_mode mode = GET_MODE (x);
4094 int cost = COSTS_N_INSNS (2);
4096 /* Folding to a CONST_VECTOR will use extra space but there might
4097 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4098 only if it allows us to fold away multiple insns. Changing the cost
4099 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4100 because this cost will only be compared against a single insn.
4101 if (code == CONST_VECTOR)
4102 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4105 /* Use defaults for float operations. Not accurate but good enough. */
4108 *total = COSTS_N_INSNS (13);
4113 *total = COSTS_N_INSNS (6);
4119 if (satisfies_constraint_K (x))
4121 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4122 *total = COSTS_N_INSNS (1);
4124 *total = COSTS_N_INSNS (3);
4128 *total = COSTS_N_INSNS (3);
4133 *total = COSTS_N_INSNS (0);
4137 *total = COSTS_N_INSNS (5);
4141 case FLOAT_TRUNCATE:
4143 case UNSIGNED_FLOAT:
4146 *total = COSTS_N_INSNS (7);
4152 *total = COSTS_N_INSNS (9);
4159 GET_CODE (XEXP (x, 0)) ==
4160 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4161 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4163 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4165 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4166 cost = COSTS_N_INSNS (14);
4167 if ((val & 0xffff) == 0)
4168 cost = COSTS_N_INSNS (9);
4169 else if (val > 0 && val < 0x10000)
4170 cost = COSTS_N_INSNS (11);
4179 *total = COSTS_N_INSNS (20);
4186 *total = COSTS_N_INSNS (4);
4189 if (XINT (x, 1) == UNSPEC_CONVERT)
4190 *total = COSTS_N_INSNS (0);
4192 *total = COSTS_N_INSNS (4);
4195 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4196 if (GET_MODE_CLASS (mode) == MODE_INT
4197 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4198 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4199 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4205 spu_eh_return_filter_mode (void)
4207 /* We would like this to be SImode, but sjlj exceptions seems to work
4208 only with word_mode. */
4212 /* Decide whether we can make a sibling call to a function. DECL is the
4213 declaration of the function being targeted by the call and EXP is the
4214 CALL_EXPR representing the call. */
4216 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4218 return decl && !TARGET_LARGE_MEM;
4221 /* We need to correctly update the back chain pointer and the Available
4222 Stack Size (which is in the second slot of the sp register.) */
4224 spu_allocate_stack (rtx op0, rtx op1)
4227 rtx chain = gen_reg_rtx (V4SImode);
4228 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4229 rtx sp = gen_reg_rtx (V4SImode);
4230 rtx splatted = gen_reg_rtx (V4SImode);
4231 rtx pat = gen_reg_rtx (TImode);
4233 /* copy the back chain so we can save it back again. */
4234 emit_move_insn (chain, stack_bot);
4236 op1 = force_reg (SImode, op1);
4238 v = 0x1020300010203ll;
4239 emit_move_insn (pat, immed_double_const (v, v, TImode));
4240 emit_insn (gen_shufb (splatted, op1, op1, pat));
4242 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4243 emit_insn (gen_subv4si3 (sp, sp, splatted));
4245 if (flag_stack_check)
4247 rtx avail = gen_reg_rtx(SImode);
4248 rtx result = gen_reg_rtx(SImode);
4249 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4250 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4251 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4254 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4256 emit_move_insn (stack_bot, chain);
4258 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4262 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4264 static unsigned char arr[16] =
4265 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4266 rtx temp = gen_reg_rtx (SImode);
4267 rtx temp2 = gen_reg_rtx (SImode);
4268 rtx temp3 = gen_reg_rtx (V4SImode);
4269 rtx temp4 = gen_reg_rtx (V4SImode);
4270 rtx pat = gen_reg_rtx (TImode);
4271 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4273 /* Restore the backchain from the first word, sp from the second. */
4274 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4275 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4277 emit_move_insn (pat, array_to_constant (TImode, arr));
4279 /* Compute Available Stack Size for sp */
4280 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4281 emit_insn (gen_shufb (temp3, temp, temp, pat));
4283 /* Compute Available Stack Size for back chain */
4284 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4285 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4286 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4288 emit_insn (gen_addv4si3 (sp, sp, temp3));
4289 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4293 spu_init_libfuncs (void)
4295 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4296 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4297 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4298 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4299 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4300 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4301 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4302 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4303 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4304 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4305 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4307 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4308 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
4311 /* Make a subreg, stripping any existing subreg. We could possibly just
4312 call simplify_subreg, but in this case we know what we want. */
4314 spu_gen_subreg (enum machine_mode mode, rtx x)
4316 if (GET_CODE (x) == SUBREG)
4318 if (GET_MODE (x) == mode)
4320 return gen_rtx_SUBREG (mode, x, 0);
4324 spu_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
4326 return (TYPE_MODE (type) == BLKmode
4328 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4329 || int_size_in_bytes (type) >
4330 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4333 /* Create the built-in types and functions */
4335 struct spu_builtin_description spu_builtins[] = {
4336 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4337 {fcode, icode, name, type, params, NULL_TREE},
4338 #include "spu-builtins.def"
4343 spu_init_builtins (void)
4345 struct spu_builtin_description *d;
4348 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4349 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4350 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4351 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4352 V4SF_type_node = build_vector_type (float_type_node, 4);
4353 V2DF_type_node = build_vector_type (double_type_node, 2);
4355 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4356 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4357 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4358 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4360 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
4362 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4363 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4364 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4365 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4366 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4367 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4368 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4369 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4370 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4371 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4372 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4373 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4375 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4376 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4377 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4378 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4379 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4380 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4381 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4382 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4384 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4385 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4387 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4389 spu_builtin_types[SPU_BTI_PTR] =
4390 build_pointer_type (build_qualified_type
4392 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4394 /* For each builtin we build a new prototype. The tree code will make
4395 sure nodes are shared. */
4396 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4399 char name[64]; /* build_function will make a copy. */
4405 /* find last parm */
4406 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4412 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4414 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4416 sprintf (name, "__builtin_%s", d->name);
4418 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4420 if (d->fcode == SPU_MASK_FOR_LOAD)
4421 TREE_READONLY (d->fndecl) = 1;
4426 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4428 static unsigned char arr[16] =
4429 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4431 rtx temp = gen_reg_rtx (Pmode);
4432 rtx temp2 = gen_reg_rtx (V4SImode);
4433 rtx temp3 = gen_reg_rtx (V4SImode);
4434 rtx pat = gen_reg_rtx (TImode);
4435 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4437 emit_move_insn (pat, array_to_constant (TImode, arr));
4439 /* Restore the sp. */
4440 emit_move_insn (temp, op1);
4441 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4443 /* Compute available stack size for sp. */
4444 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4445 emit_insn (gen_shufb (temp3, temp, temp, pat));
4447 emit_insn (gen_addv4si3 (sp, sp, temp3));
4448 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4452 spu_safe_dma (HOST_WIDE_INT channel)
4454 return (channel >= 21 && channel <= 27);
4458 spu_builtin_splats (rtx ops[])
4460 enum machine_mode mode = GET_MODE (ops[0]);
4461 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4463 unsigned char arr[16];
4464 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4465 emit_move_insn (ops[0], array_to_constant (mode, arr));
4467 else if (!flag_pic && GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4469 rtvec v = rtvec_alloc (4);
4470 RTVEC_ELT (v, 0) = ops[1];
4471 RTVEC_ELT (v, 1) = ops[1];
4472 RTVEC_ELT (v, 2) = ops[1];
4473 RTVEC_ELT (v, 3) = ops[1];
4474 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4478 rtx reg = gen_reg_rtx (TImode);
4480 if (GET_CODE (ops[1]) != REG
4481 && GET_CODE (ops[1]) != SUBREG)
4482 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4488 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4494 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4499 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4504 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4510 emit_move_insn (reg, shuf);
4511 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4516 spu_builtin_extract (rtx ops[])
4518 enum machine_mode mode;
4521 mode = GET_MODE (ops[1]);
4523 if (GET_CODE (ops[2]) == CONST_INT)
4528 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4531 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4534 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4537 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4540 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4543 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4551 from = spu_gen_subreg (TImode, ops[1]);
4552 rot = gen_reg_rtx (TImode);
4553 tmp = gen_reg_rtx (SImode);
4558 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4561 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4562 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4566 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4570 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4575 emit_insn (gen_rotqby_ti (rot, from, tmp));
4577 emit_insn (gen_spu_convert (ops[0], rot));
4581 spu_builtin_insert (rtx ops[])
4583 enum machine_mode mode = GET_MODE (ops[0]);
4584 enum machine_mode imode = GET_MODE_INNER (mode);
4585 rtx mask = gen_reg_rtx (TImode);
4588 if (GET_CODE (ops[3]) == CONST_INT)
4589 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4592 offset = gen_reg_rtx (SImode);
4593 emit_insn (gen_mulsi3
4594 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4597 (mask, stack_pointer_rtx, offset,
4598 GEN_INT (GET_MODE_SIZE (imode))));
4599 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4603 spu_builtin_promote (rtx ops[])
4605 enum machine_mode mode, imode;
4606 rtx rot, from, offset;
4609 mode = GET_MODE (ops[0]);
4610 imode = GET_MODE_INNER (mode);
4612 from = gen_reg_rtx (TImode);
4613 rot = spu_gen_subreg (TImode, ops[0]);
4615 emit_insn (gen_spu_convert (from, ops[1]));
4617 if (GET_CODE (ops[2]) == CONST_INT)
4619 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4620 if (GET_MODE_SIZE (imode) < 4)
4621 pos += 4 - GET_MODE_SIZE (imode);
4622 offset = GEN_INT (pos & 15);
4626 offset = gen_reg_rtx (SImode);
4630 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4633 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4634 emit_insn (gen_addsi3 (offset, offset, offset));
4638 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4639 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4643 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4649 emit_insn (gen_rotqby_ti (rot, from, offset));
4653 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4655 rtx shuf = gen_reg_rtx (V4SImode);
4656 rtx insn = gen_reg_rtx (V4SImode);
4661 fnaddr = force_reg (SImode, fnaddr);
4662 cxt = force_reg (SImode, cxt);
4664 if (TARGET_LARGE_MEM)
4666 rtx rotl = gen_reg_rtx (V4SImode);
4667 rtx mask = gen_reg_rtx (V4SImode);
4668 rtx bi = gen_reg_rtx (SImode);
4669 unsigned char shufa[16] = {
4670 2, 3, 0, 1, 18, 19, 16, 17,
4671 0, 1, 2, 3, 16, 17, 18, 19
4673 unsigned char insna[16] = {
4675 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4677 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4680 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4681 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4683 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4684 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4685 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4686 emit_insn (gen_selb (insn, insnc, rotl, mask));
4688 mem = memory_address (Pmode, tramp);
4689 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4691 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4692 mem = memory_address (Pmode, plus_constant (tramp, 16));
4693 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4697 rtx scxt = gen_reg_rtx (SImode);
4698 rtx sfnaddr = gen_reg_rtx (SImode);
4699 unsigned char insna[16] = {
4700 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4706 shufc = gen_reg_rtx (TImode);
4707 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4709 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4710 fits 18 bits and the last 4 are zeros. This will be true if
4711 the stack pointer is initialized to 0x3fff0 at program start,
4712 otherwise the ila instruction will be garbage. */
4714 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4715 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4717 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4718 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4719 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4721 mem = memory_address (Pmode, tramp);
4722 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4725 emit_insn (gen_sync ());
4729 spu_expand_sign_extend (rtx ops[])
4731 unsigned char arr[16];
4732 rtx pat = gen_reg_rtx (TImode);
4735 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4736 if (GET_MODE (ops[1]) == QImode)
4738 sign = gen_reg_rtx (HImode);
4739 emit_insn (gen_extendqihi2 (sign, ops[1]));
4740 for (i = 0; i < 16; i++)
4746 for (i = 0; i < 16; i++)
4748 switch (GET_MODE (ops[1]))
4751 sign = gen_reg_rtx (SImode);
4752 emit_insn (gen_extendhisi2 (sign, ops[1]));
4754 arr[last - 1] = 0x02;
4757 sign = gen_reg_rtx (SImode);
4758 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4759 for (i = 0; i < 4; i++)
4760 arr[last - i] = 3 - i;
4763 sign = gen_reg_rtx (SImode);
4764 c = gen_reg_rtx (SImode);
4765 emit_insn (gen_spu_convert (c, ops[1]));
4766 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4767 for (i = 0; i < 8; i++)
4768 arr[last - i] = 7 - i;
4774 emit_move_insn (pat, array_to_constant (TImode, arr));
4775 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4778 /* expand vector initialization. If there are any constant parts,
4779 load constant parts first. Then load any non-constant parts. */
4781 spu_expand_vector_init (rtx target, rtx vals)
4783 enum machine_mode mode = GET_MODE (target);
4784 int n_elts = GET_MODE_NUNITS (mode);
4786 bool all_same = true;
4787 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
4790 first = XVECEXP (vals, 0, 0);
4791 for (i = 0; i < n_elts; ++i)
4793 x = XVECEXP (vals, 0, i);
4794 if (!CONSTANT_P (x))
4798 if (first_constant == NULL_RTX)
4801 if (i > 0 && !rtx_equal_p (x, first))
4805 /* if all elements are the same, use splats to repeat elements */
4808 if (!CONSTANT_P (first)
4809 && !register_operand (first, GET_MODE (x)))
4810 first = force_reg (GET_MODE (first), first);
4811 emit_insn (gen_spu_splats (target, first));
4815 /* load constant parts */
4816 if (n_var != n_elts)
4820 emit_move_insn (target,
4821 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4825 rtx constant_parts_rtx = copy_rtx (vals);
4827 gcc_assert (first_constant != NULL_RTX);
4828 /* fill empty slots with the first constant, this increases
4829 our chance of using splats in the recursive call below. */
4830 for (i = 0; i < n_elts; ++i)
4831 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4832 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4834 spu_expand_vector_init (target, constant_parts_rtx);
4838 /* load variable parts */
4841 rtx insert_operands[4];
4843 insert_operands[0] = target;
4844 insert_operands[2] = target;
4845 for (i = 0; i < n_elts; ++i)
4847 x = XVECEXP (vals, 0, i);
4848 if (!CONSTANT_P (x))
4850 if (!register_operand (x, GET_MODE (x)))
4851 x = force_reg (GET_MODE (x), x);
4852 insert_operands[1] = x;
4853 insert_operands[3] = GEN_INT (i);
4854 spu_builtin_insert (insert_operands);
4861 spu_force_reg (enum machine_mode mode, rtx op)
4864 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
4866 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
4867 || GET_MODE (op) == BLKmode)
4868 return force_reg (mode, convert_to_mode (mode, op, 0));
4872 r = force_reg (GET_MODE (op), op);
4873 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
4875 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
4880 x = gen_reg_rtx (mode);
4881 emit_insn (gen_spu_convert (x, r));
4886 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
4888 HOST_WIDE_INT v = 0;
4890 /* Check the range of immediate operands. */
4891 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
4893 int range = p - SPU_BTI_7;
4895 if (!CONSTANT_P (op))
4896 error ("%s expects an integer literal in the range [%d, %d].",
4898 spu_builtin_range[range].low, spu_builtin_range[range].high);
4900 if (GET_CODE (op) == CONST
4901 && (GET_CODE (XEXP (op, 0)) == PLUS
4902 || GET_CODE (XEXP (op, 0)) == MINUS))
4904 v = INTVAL (XEXP (XEXP (op, 0), 1));
4905 op = XEXP (XEXP (op, 0), 0);
4907 else if (GET_CODE (op) == CONST_INT)
4909 else if (GET_CODE (op) == CONST_VECTOR
4910 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
4911 v = INTVAL (CONST_VECTOR_ELT (op, 0));
4913 /* The default for v is 0 which is valid in every range. */
4914 if (v < spu_builtin_range[range].low
4915 || v > spu_builtin_range[range].high)
4916 error ("%s expects an integer literal in the range [%d, %d]. ("
4917 HOST_WIDE_INT_PRINT_DEC ")",
4919 spu_builtin_range[range].low, spu_builtin_range[range].high,
4928 /* This is only used in lqa, and stqa. Even though the insns
4929 encode 16 bits of the address (all but the 2 least
4930 significant), only 14 bits are used because it is masked to
4931 be 16 byte aligned. */
4935 /* This is used for lqr and stqr. */
4942 if (GET_CODE (op) == LABEL_REF
4943 || (GET_CODE (op) == SYMBOL_REF
4944 && SYMBOL_REF_FUNCTION_P (op))
4945 || (v & ((1 << lsbits) - 1)) != 0)
4946 warning (0, "%d least significant bits of %s are ignored.", lsbits,
4953 expand_builtin_args (struct spu_builtin_description *d, tree exp,
4954 rtx target, rtx ops[])
4956 enum insn_code icode = d->icode;
4959 /* Expand the arguments into rtl. */
4961 if (d->parm[0] != SPU_BTI_VOID)
4964 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
4966 tree arg = CALL_EXPR_ARG (exp, a);
4969 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
4974 spu_expand_builtin_1 (struct spu_builtin_description *d,
4975 tree exp, rtx target)
4979 enum insn_code icode = d->icode;
4980 enum machine_mode mode, tmode;
4984 /* Set up ops[] with values from arglist. */
4985 expand_builtin_args (d, exp, target, ops);
4987 /* Handle the target operand which must be operand 0. */
4989 if (d->parm[0] != SPU_BTI_VOID)
4992 /* We prefer the mode specified for the match_operand otherwise
4993 use the mode from the builtin function prototype. */
4994 tmode = insn_data[d->icode].operand[0].mode;
4995 if (tmode == VOIDmode)
4996 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
4998 /* Try to use target because not using it can lead to extra copies
4999 and when we are using all of the registers extra copies leads
5001 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5004 target = ops[0] = gen_reg_rtx (tmode);
5006 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5012 if (d->fcode == SPU_MASK_FOR_LOAD)
5014 enum machine_mode mode = insn_data[icode].operand[1].mode;
5019 arg = CALL_EXPR_ARG (exp, 0);
5020 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5021 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5022 addr = memory_address (mode, op);
5025 op = gen_reg_rtx (GET_MODE (addr));
5026 emit_insn (gen_rtx_SET (VOIDmode, op,
5027 gen_rtx_NEG (GET_MODE (addr), addr)));
5028 op = gen_rtx_MEM (mode, op);
5030 pat = GEN_FCN (icode) (target, op);
5037 /* Ignore align_hint, but still expand it's args in case they have
5039 if (icode == CODE_FOR_spu_align_hint)
5042 /* Handle the rest of the operands. */
5043 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5045 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5046 mode = insn_data[d->icode].operand[i].mode;
5048 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5050 /* mode can be VOIDmode here for labels */
5052 /* For specific intrinsics with an immediate operand, e.g.,
5053 si_ai(), we sometimes need to convert the scalar argument to a
5054 vector argument by splatting the scalar. */
5055 if (VECTOR_MODE_P (mode)
5056 && (GET_CODE (ops[i]) == CONST_INT
5057 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
5058 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
5060 if (GET_CODE (ops[i]) == CONST_INT)
5061 ops[i] = spu_const (mode, INTVAL (ops[i]));
5064 rtx reg = gen_reg_rtx (mode);
5065 enum machine_mode imode = GET_MODE_INNER (mode);
5066 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5067 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5068 if (imode != GET_MODE (ops[i]))
5069 ops[i] = convert_to_mode (imode, ops[i],
5070 TYPE_UNSIGNED (spu_builtin_types
5072 emit_insn (gen_spu_splats (reg, ops[i]));
5077 spu_check_builtin_parm (d, ops[i], d->parm[p]);
5079 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
5080 ops[i] = spu_force_reg (mode, ops[i]);
5083 switch (insn_data[icode].n_operands)
5086 pat = GEN_FCN (icode) (0);
5089 pat = GEN_FCN (icode) (ops[0]);
5092 pat = GEN_FCN (icode) (ops[0], ops[1]);
5095 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
5098 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
5101 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
5104 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
5113 if (d->type == B_CALL || d->type == B_BISLED)
5114 emit_call_insn (pat);
5115 else if (d->type == B_JUMP)
5117 emit_jump_insn (pat);
5123 return_type = spu_builtin_types[d->parm[0]];
5124 if (d->parm[0] != SPU_BTI_VOID
5125 && GET_MODE (target) != TYPE_MODE (return_type))
5127 /* target is the return value. It should always be the mode of
5128 the builtin function prototype. */
5129 target = spu_force_reg (TYPE_MODE (return_type), target);
5136 spu_expand_builtin (tree exp,
5138 rtx subtarget ATTRIBUTE_UNUSED,
5139 enum machine_mode mode ATTRIBUTE_UNUSED,
5140 int ignore ATTRIBUTE_UNUSED)
5142 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5143 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
5144 struct spu_builtin_description *d;
5146 if (fcode < NUM_SPU_BUILTINS)
5148 d = &spu_builtins[fcode];
5150 return spu_expand_builtin_1 (d, exp, target);
5155 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5157 spu_builtin_mul_widen_even (tree type)
5159 switch (TYPE_MODE (type))
5162 if (TYPE_UNSIGNED (type))
5163 return spu_builtins[SPU_MULE_0].fndecl;
5165 return spu_builtins[SPU_MULE_1].fndecl;
5172 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5174 spu_builtin_mul_widen_odd (tree type)
5176 switch (TYPE_MODE (type))
5179 if (TYPE_UNSIGNED (type))
5180 return spu_builtins[SPU_MULO_1].fndecl;
5182 return spu_builtins[SPU_MULO_0].fndecl;
5189 /* Implement targetm.vectorize.builtin_mask_for_load. */
5191 spu_builtin_mask_for_load (void)
5193 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5198 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5200 spu_builtin_vectorization_cost (bool runtime_test)
5202 /* If the branch of the runtime test is taken - i.e. - the vectorized
5203 version is skipped - this incurs a misprediction cost (because the
5204 vectorized version is expected to be the fall-through). So we subtract
5205 the latency of a mispredicted branch from the costs that are incured
5206 when the vectorized version is executed. */
5214 spu_init_expanders (void)
5216 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5217 * frame_pointer_needed is true. We don't know that until we're
5218 * expanding the prologue. */
5220 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;