1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 #include "coretypes.h"
24 #include "hard-reg-set.h"
26 #include "insn-config.h"
27 #include "conditions.h"
28 #include "insn-attr.h"
38 #include "basic-block.h"
39 #include "integrate.h"
45 #include "target-def.h"
46 #include "langhooks.h"
48 #include "cfglayout.h"
49 #include "sched-int.h"
54 #include "tree-gimple.h"
55 #include "tm-constrs.h"
56 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
64 static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88 static rtx get_pic_reg (void);
89 static int need_to_save_reg (int regno, int saving);
90 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
94 static void emit_nop_for_insn (rtx insn);
95 static bool insn_clobbers_hbr (rtx insn);
96 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
98 static rtx get_branch_target (rtx branch);
99 static void insert_branch_hints (void);
100 static void insert_nops (void);
101 static void spu_machine_dependent_reorg (void);
102 static int spu_sched_issue_rate (void);
103 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
105 static int get_pipe (rtx insn);
106 static int spu_sched_adjust_priority (rtx insn, int pri);
107 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
108 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
110 unsigned char *no_add_attrs);
111 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
113 unsigned char *no_add_attrs);
114 static int spu_naked_function_p (tree func);
115 static unsigned char spu_pass_by_reference (int *cum, enum machine_mode mode,
116 tree type, unsigned char named);
117 static tree spu_build_builtin_va_list (void);
118 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
120 static int regno_aligned_for_load (int regno);
121 static int store_with_one_insn_p (rtx mem);
122 static int reg_align (rtx reg);
123 static int mem_is_padded_component_ref (rtx x);
124 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
125 static void spu_asm_globalize_label (FILE * file, const char *name);
126 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
128 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
129 static void spu_init_libfuncs (void);
130 static bool spu_return_in_memory (tree type, tree fntype);
131 static void fix_range (const char *);
132 static void spu_encode_section_info (tree, rtx, int);
133 static tree spu_builtin_mul_widen_even (tree);
134 static tree spu_builtin_mul_widen_odd (tree);
135 static tree spu_builtin_mask_for_load (void);
137 extern const char *reg_names[];
138 rtx spu_compare_op0, spu_compare_op1;
153 IC_POOL, /* constant pool */
154 IC_IL1, /* one il* instruction */
155 IC_IL2, /* both ilhu and iohl instructions */
156 IC_IL1s, /* one il* instruction */
157 IC_IL2s, /* both ilhu and iohl instructions */
158 IC_FSMBI, /* the fsmbi instruction */
159 IC_CPAT, /* one of the c*d instructions */
162 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
163 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
164 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
165 static enum immediate_class classify_immediate (rtx op,
166 enum machine_mode mode);
168 /* Built in types. */
169 tree spu_builtin_types[SPU_BTI_MAX];
171 /* TARGET overrides. */
173 #undef TARGET_INIT_BUILTINS
174 #define TARGET_INIT_BUILTINS spu_init_builtins
176 #undef TARGET_EXPAND_BUILTIN
177 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
179 #undef TARGET_EH_RETURN_FILTER_MODE
180 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
182 /* The .8byte directive doesn't seem to work well for a 32 bit
184 #undef TARGET_ASM_UNALIGNED_DI_OP
185 #define TARGET_ASM_UNALIGNED_DI_OP NULL
187 #undef TARGET_RTX_COSTS
188 #define TARGET_RTX_COSTS spu_rtx_costs
190 #undef TARGET_ADDRESS_COST
191 #define TARGET_ADDRESS_COST hook_int_rtx_0
193 #undef TARGET_SCHED_ISSUE_RATE
194 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
196 #undef TARGET_SCHED_VARIABLE_ISSUE
197 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
199 #undef TARGET_SCHED_ADJUST_PRIORITY
200 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
202 #undef TARGET_SCHED_ADJUST_COST
203 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
205 const struct attribute_spec spu_attribute_table[];
206 #undef TARGET_ATTRIBUTE_TABLE
207 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
209 #undef TARGET_ASM_INTEGER
210 #define TARGET_ASM_INTEGER spu_assemble_integer
212 #undef TARGET_SCALAR_MODE_SUPPORTED_P
213 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
215 #undef TARGET_VECTOR_MODE_SUPPORTED_P
216 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
218 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
219 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
221 #undef TARGET_ASM_GLOBALIZE_LABEL
222 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
224 #undef TARGET_PASS_BY_REFERENCE
225 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
227 #undef TARGET_MUST_PASS_IN_STACK
228 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
230 #undef TARGET_BUILD_BUILTIN_VA_LIST
231 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
233 #undef TARGET_SETUP_INCOMING_VARARGS
234 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
236 #undef TARGET_MACHINE_DEPENDENT_REORG
237 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
239 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
240 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
242 #undef TARGET_DEFAULT_TARGET_FLAGS
243 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
245 #undef TARGET_INIT_LIBFUNCS
246 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
248 #undef TARGET_RETURN_IN_MEMORY
249 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
251 #undef TARGET_ENCODE_SECTION_INFO
252 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
254 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
255 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
257 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
258 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
260 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
261 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
263 struct gcc_target targetm = TARGET_INITIALIZER;
265 /* Sometimes certain combinations of command options do not make sense
266 on a particular target machine. You can define a macro
267 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
268 executed once just after all the command options have been parsed. */
270 spu_override_options (void)
272 /* Override some of the default param values. With so many registers
273 larger values are better for these params. */
274 if (MAX_UNROLLED_INSNS == 100)
275 MAX_UNROLLED_INSNS = 250;
276 if (MAX_PENDING_LIST_LENGTH == 32)
277 MAX_PENDING_LIST_LENGTH = 128;
279 flag_omit_frame_pointer = 1;
281 if (align_functions < 8)
284 if (spu_fixed_range_string)
285 fix_range (spu_fixed_range_string);
288 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
289 struct attribute_spec.handler. */
291 /* Table of machine attributes. */
292 const struct attribute_spec spu_attribute_table[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
295 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
296 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
297 { NULL, 0, 0, false, false, false, NULL }
300 /* True if MODE is valid for the target. By "valid", we mean able to
301 be manipulated in non-trivial ways. In particular, this means all
302 the arithmetic is supported. */
304 spu_scalar_mode_supported_p (enum machine_mode mode)
322 /* Similarly for vector modes. "Supported" here is less strict. At
323 least some operations are supported; need to check optabs or builtins
324 for further details. */
326 spu_vector_mode_supported_p (enum machine_mode mode)
343 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
344 least significant bytes of the outer mode. This function returns
345 TRUE for the SUBREG's where this is correct. */
347 valid_subreg (rtx op)
349 enum machine_mode om = GET_MODE (op);
350 enum machine_mode im = GET_MODE (SUBREG_REG (op));
351 return om != VOIDmode && im != VOIDmode
352 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
353 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
356 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
357 and adjust the start offset. */
359 adjust_operand (rtx op, HOST_WIDE_INT * start)
361 enum machine_mode mode;
363 /* Strip any SUBREG */
364 if (GET_CODE (op) == SUBREG)
368 GET_MODE_BITSIZE (GET_MODE (op)) -
369 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
370 op = SUBREG_REG (op);
372 /* If it is smaller than SI, assure a SUBREG */
373 op_size = GET_MODE_BITSIZE (GET_MODE (op));
377 *start += 32 - op_size;
380 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
381 mode = mode_for_size (op_size, MODE_INT, 0);
382 if (mode != GET_MODE (op))
383 op = gen_rtx_SUBREG (mode, op, 0);
388 spu_expand_extv (rtx ops[], int unsignedp)
390 HOST_WIDE_INT width = INTVAL (ops[2]);
391 HOST_WIDE_INT start = INTVAL (ops[3]);
392 HOST_WIDE_INT src_size, dst_size;
393 enum machine_mode src_mode, dst_mode;
394 rtx dst = ops[0], src = ops[1];
397 dst = adjust_operand (ops[0], 0);
398 dst_mode = GET_MODE (dst);
399 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
401 src = adjust_operand (src, &start);
402 src_mode = GET_MODE (src);
403 src_size = GET_MODE_BITSIZE (GET_MODE (src));
407 s = gen_reg_rtx (src_mode);
411 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
414 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
417 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
425 if (width < src_size)
432 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
435 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
438 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
443 s = gen_reg_rtx (src_mode);
444 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
449 convert_move (dst, src, unsignedp);
453 spu_expand_insv (rtx ops[])
455 HOST_WIDE_INT width = INTVAL (ops[1]);
456 HOST_WIDE_INT start = INTVAL (ops[2]);
457 HOST_WIDE_INT maskbits;
458 enum machine_mode dst_mode, src_mode;
459 rtx dst = ops[0], src = ops[3];
460 int dst_size, src_size;
466 if (GET_CODE (ops[0]) == MEM)
467 dst = gen_reg_rtx (TImode);
469 dst = adjust_operand (dst, &start);
470 dst_mode = GET_MODE (dst);
471 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
473 if (CONSTANT_P (src))
475 enum machine_mode m =
476 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
477 src = force_reg (m, convert_to_mode (m, src, 0));
479 src = adjust_operand (src, 0);
480 src_mode = GET_MODE (src);
481 src_size = GET_MODE_BITSIZE (GET_MODE (src));
483 mask = gen_reg_rtx (dst_mode);
484 shift_reg = gen_reg_rtx (dst_mode);
485 shift = dst_size - start - width;
487 /* It's not safe to use subreg here because the compiler assumes
488 that the SUBREG_REG is right justified in the SUBREG. */
489 convert_move (shift_reg, src, 1);
496 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
499 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
502 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
514 maskbits = (-1ll << (32 - width - start));
516 maskbits += (1ll << (32 - start));
517 emit_move_insn (mask, GEN_INT (maskbits));
520 maskbits = (-1ll << (64 - width - start));
522 maskbits += (1ll << (64 - start));
523 emit_move_insn (mask, GEN_INT (maskbits));
527 unsigned char arr[16];
529 memset (arr, 0, sizeof (arr));
530 arr[i] = 0xff >> (start & 7);
531 for (i++; i <= (start + width - 1) / 8; i++)
533 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
534 emit_move_insn (mask, array_to_constant (TImode, arr));
540 if (GET_CODE (ops[0]) == MEM)
542 rtx aligned = gen_reg_rtx (SImode);
543 rtx low = gen_reg_rtx (SImode);
544 rtx addr = gen_reg_rtx (SImode);
545 rtx rotl = gen_reg_rtx (SImode);
546 rtx mask0 = gen_reg_rtx (TImode);
549 emit_move_insn (addr, XEXP (ops[0], 0));
550 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
551 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
552 emit_insn (gen_negsi2 (rotl, low));
553 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
554 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
555 mem = change_address (ops[0], TImode, aligned);
556 set_mem_alias_set (mem, 0);
557 emit_move_insn (dst, mem);
558 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
559 emit_move_insn (mem, dst);
560 if (start + width > MEM_ALIGN (ops[0]))
562 rtx shl = gen_reg_rtx (SImode);
563 rtx mask1 = gen_reg_rtx (TImode);
564 rtx dst1 = gen_reg_rtx (TImode);
566 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
567 emit_insn (gen_shlqby_ti (mask1, mask, shl));
568 mem1 = adjust_address (mem, TImode, 16);
569 set_mem_alias_set (mem1, 0);
570 emit_move_insn (dst1, mem1);
571 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
572 emit_move_insn (mem1, dst1);
576 emit_insn (gen_selb (dst, dst, shift_reg, mask));
581 spu_expand_block_move (rtx ops[])
583 HOST_WIDE_INT bytes, align, offset;
584 rtx src, dst, sreg, dreg, target;
586 if (GET_CODE (ops[2]) != CONST_INT
587 || GET_CODE (ops[3]) != CONST_INT
588 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
591 bytes = INTVAL (ops[2]);
592 align = INTVAL (ops[3]);
602 for (offset = 0; offset + 16 <= bytes; offset += 16)
604 dst = adjust_address (ops[0], V16QImode, offset);
605 src = adjust_address (ops[1], V16QImode, offset);
606 emit_move_insn (dst, src);
611 unsigned char arr[16] = { 0 };
612 for (i = 0; i < bytes - offset; i++)
614 dst = adjust_address (ops[0], V16QImode, offset);
615 src = adjust_address (ops[1], V16QImode, offset);
616 mask = gen_reg_rtx (V16QImode);
617 sreg = gen_reg_rtx (V16QImode);
618 dreg = gen_reg_rtx (V16QImode);
619 target = gen_reg_rtx (V16QImode);
620 emit_move_insn (mask, array_to_constant (V16QImode, arr));
621 emit_move_insn (dreg, dst);
622 emit_move_insn (sreg, src);
623 emit_insn (gen_selb (target, dreg, sreg, mask));
624 emit_move_insn (dst, target);
632 { SPU_EQ, SPU_GT, SPU_GTU };
635 int spu_comp_icode[8][3] = {
636 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
637 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
638 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
639 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
640 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
641 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
643 {CODE_FOR_ceq_vec, 0, 0},
646 /* Generate a compare for CODE. Return a brand-new rtx that represents
647 the result of the compare. GCC can figure this out too if we don't
648 provide all variations of compares, but GCC always wants to use
649 WORD_MODE, we can generate better code in most cases if we do it
652 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
654 int reverse_compare = 0;
655 int reverse_test = 0;
658 rtx target = operands[0];
659 enum machine_mode comp_mode;
660 enum machine_mode op_mode;
661 enum spu_comp_code scode;
664 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
665 and so on, to keep the constant in operand 1. */
666 if (GET_CODE (spu_compare_op1) == CONST_INT)
668 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
669 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
673 spu_compare_op1 = GEN_INT (val);
677 spu_compare_op1 = GEN_INT (val);
681 spu_compare_op1 = GEN_INT (val);
685 spu_compare_op1 = GEN_INT (val);
746 op_mode = GET_MODE (spu_compare_op0);
785 if (GET_MODE (spu_compare_op1) == DFmode)
787 rtx reg = gen_reg_rtx (DFmode);
788 if (!flag_unsafe_math_optimizations
789 || (scode != SPU_GT && scode != SPU_EQ))
792 emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
794 emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
796 spu_compare_op0 = reg;
797 spu_compare_op1 = CONST0_RTX (DFmode);
800 if (is_set == 0 && spu_compare_op1 == const0_rtx
801 && (GET_MODE (spu_compare_op0) == SImode
802 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
804 /* Don't need to set a register with the result when we are
805 comparing against zero and branching. */
806 reverse_test = !reverse_test;
807 compare_result = spu_compare_op0;
811 compare_result = gen_reg_rtx (comp_mode);
815 rtx t = spu_compare_op1;
816 spu_compare_op1 = spu_compare_op0;
820 if (spu_comp_icode[index][scode] == 0)
823 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
824 (spu_compare_op0, op_mode))
825 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
826 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
827 (spu_compare_op1, op_mode))
828 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
829 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
834 emit_insn (comp_rtx);
843 /* We don't have branch on QI compare insns, so we convert the
844 QI compare result to a HI result. */
845 if (comp_mode == QImode)
847 rtx old_res = compare_result;
848 compare_result = gen_reg_rtx (HImode);
850 emit_insn (gen_extendqihi2 (compare_result, old_res));
854 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
856 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
858 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
859 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
860 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
863 else if (is_set == 2)
865 int compare_size = GET_MODE_BITSIZE (comp_mode);
866 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
867 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
869 rtx op_t = operands[2];
870 rtx op_f = operands[3];
872 /* The result of the comparison can be SI, HI or QI mode. Create a
873 mask based on that result. */
874 if (target_size > compare_size)
876 select_mask = gen_reg_rtx (mode);
877 emit_insn (gen_extend_compare (select_mask, compare_result));
879 else if (target_size < compare_size)
881 gen_rtx_SUBREG (mode, compare_result,
882 (compare_size - target_size) / BITS_PER_UNIT);
883 else if (comp_mode != mode)
884 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
886 select_mask = compare_result;
888 if (GET_MODE (target) != GET_MODE (op_t)
889 || GET_MODE (target) != GET_MODE (op_f))
893 emit_insn (gen_selb (target, op_t, op_f, select_mask));
895 emit_insn (gen_selb (target, op_f, op_t, select_mask));
900 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
901 gen_rtx_NOT (comp_mode, compare_result)));
902 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
903 emit_insn (gen_extendhisi2 (target, compare_result));
904 else if (GET_MODE (target) == SImode
905 && GET_MODE (compare_result) == QImode)
906 emit_insn (gen_extend_compare (target, compare_result));
908 emit_move_insn (target, compare_result);
913 const_double_to_hwint (rtx x)
917 if (GET_MODE (x) == SFmode)
919 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
920 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
922 else if (GET_MODE (x) == DFmode)
925 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
926 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
928 val = (val << 32) | (l[1] & 0xffffffff);
936 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
940 gcc_assert (mode == SFmode || mode == DFmode);
943 tv[0] = (v << 32) >> 32;
944 else if (mode == DFmode)
946 tv[1] = (v << 32) >> 32;
949 real_from_target (&rv, tv, mode);
950 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
954 print_operand_address (FILE * file, register rtx addr)
959 if (GET_CODE (addr) == AND
960 && GET_CODE (XEXP (addr, 1)) == CONST_INT
961 && INTVAL (XEXP (addr, 1)) == -16)
962 addr = XEXP (addr, 0);
964 switch (GET_CODE (addr))
967 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
971 reg = XEXP (addr, 0);
972 offset = XEXP (addr, 1);
973 if (GET_CODE (offset) == REG)
975 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
976 reg_names[REGNO (offset)]);
978 else if (GET_CODE (offset) == CONST_INT)
980 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
981 INTVAL (offset), reg_names[REGNO (reg)]);
991 output_addr_const (file, addr);
1001 print_operand (FILE * file, rtx x, int code)
1003 enum machine_mode mode = GET_MODE (x);
1005 unsigned char arr[16];
1006 int xcode = GET_CODE (x);
1008 if (GET_MODE (x) == VOIDmode)
1011 case 'L': /* 128 bits, signed */
1012 case 'm': /* 128 bits, signed */
1013 case 'T': /* 128 bits, signed */
1014 case 't': /* 128 bits, signed */
1017 case 'K': /* 64 bits, signed */
1018 case 'k': /* 64 bits, signed */
1019 case 'D': /* 64 bits, signed */
1020 case 'd': /* 64 bits, signed */
1023 case 'J': /* 32 bits, signed */
1024 case 'j': /* 32 bits, signed */
1025 case 's': /* 32 bits, signed */
1026 case 'S': /* 32 bits, signed */
1033 case 'j': /* 32 bits, signed */
1034 case 'k': /* 64 bits, signed */
1035 case 'm': /* 128 bits, signed */
1036 if (xcode == CONST_INT
1037 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1039 gcc_assert (logical_immediate_p (x, mode));
1040 constant_to_array (mode, x, arr);
1041 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1042 val = trunc_int_for_mode (val, SImode);
1043 switch (which_logical_immediate (val))
1048 fprintf (file, "h");
1051 fprintf (file, "b");
1061 case 'J': /* 32 bits, signed */
1062 case 'K': /* 64 bits, signed */
1063 case 'L': /* 128 bits, signed */
1064 if (xcode == CONST_INT
1065 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1067 gcc_assert (logical_immediate_p (x, mode)
1068 || iohl_immediate_p (x, mode));
1069 constant_to_array (mode, x, arr);
1070 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1071 val = trunc_int_for_mode (val, SImode);
1072 switch (which_logical_immediate (val))
1078 val = trunc_int_for_mode (val, HImode);
1081 val = trunc_int_for_mode (val, QImode);
1086 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1092 case 't': /* 128 bits, signed */
1093 case 'd': /* 64 bits, signed */
1094 case 's': /* 32 bits, signed */
1097 enum immediate_class c = classify_immediate (x, mode);
1101 constant_to_array (mode, x, arr);
1102 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1103 val = trunc_int_for_mode (val, SImode);
1104 switch (which_immediate_load (val))
1109 fprintf (file, "a");
1112 fprintf (file, "h");
1115 fprintf (file, "hu");
1122 constant_to_array (mode, x, arr);
1123 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1125 fprintf (file, "b");
1127 fprintf (file, "h");
1129 fprintf (file, "w");
1131 fprintf (file, "d");
1134 if (xcode == CONST_VECTOR)
1136 x = CONST_VECTOR_ELT (x, 0);
1137 xcode = GET_CODE (x);
1139 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1140 fprintf (file, "a");
1141 else if (xcode == HIGH)
1142 fprintf (file, "hu");
1155 case 'T': /* 128 bits, signed */
1156 case 'D': /* 64 bits, signed */
1157 case 'S': /* 32 bits, signed */
1160 enum immediate_class c = classify_immediate (x, mode);
1164 constant_to_array (mode, x, arr);
1165 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1166 val = trunc_int_for_mode (val, SImode);
1167 switch (which_immediate_load (val))
1174 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1179 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1182 constant_to_array (mode, x, arr);
1184 for (i = 0; i < 16; i++)
1189 print_operand (file, GEN_INT (val), 0);
1192 constant_to_array (mode, x, arr);
1193 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1194 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1197 if (xcode == CONST_VECTOR)
1199 x = CONST_VECTOR_ELT (x, 0);
1200 xcode = GET_CODE (x);
1204 output_addr_const (file, XEXP (x, 0));
1205 fprintf (file, "@h");
1208 output_addr_const (file, x);
1221 if (xcode == CONST_INT)
1223 /* Only 4 least significant bits are relevant for generate
1224 control word instructions. */
1225 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1230 case 'M': /* print code for c*d */
1231 if (GET_CODE (x) == CONST_INT)
1235 fprintf (file, "b");
1238 fprintf (file, "h");
1241 fprintf (file, "w");
1244 fprintf (file, "d");
1253 case 'N': /* Negate the operand */
1254 if (xcode == CONST_INT)
1255 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1256 else if (xcode == CONST_VECTOR)
1257 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1258 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1261 case 'I': /* enable/disable interrupts */
1262 if (xcode == CONST_INT)
1263 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1266 case 'b': /* branch modifiers */
1268 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1269 else if (COMPARISON_P (x))
1270 fprintf (file, "%s", xcode == NE ? "n" : "");
1273 case 'i': /* indirect call */
1276 if (GET_CODE (XEXP (x, 0)) == REG)
1277 /* Used in indirect function calls. */
1278 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1280 output_address (XEXP (x, 0));
1284 case 'p': /* load/store */
1288 xcode = GET_CODE (x);
1293 xcode = GET_CODE (x);
1296 fprintf (file, "d");
1297 else if (xcode == CONST_INT)
1298 fprintf (file, "a");
1299 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1300 fprintf (file, "r");
1301 else if (xcode == PLUS || xcode == LO_SUM)
1303 if (GET_CODE (XEXP (x, 1)) == REG)
1304 fprintf (file, "x");
1306 fprintf (file, "d");
1312 fprintf (file, "%s", reg_names[REGNO (x)]);
1313 else if (xcode == MEM)
1314 output_address (XEXP (x, 0));
1315 else if (xcode == CONST_VECTOR)
1316 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1318 output_addr_const (file, x);
1322 output_operand_lossage ("invalid %%xn code");
1327 extern char call_used_regs[];
1328 extern char regs_ever_live[];
1330 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1331 caller saved register. For leaf functions it is more efficient to
1332 use a volatile register because we won't need to save and restore the
1333 pic register. This routine is only valid after register allocation
1334 is completed, so we can pick an unused register. */
1338 rtx pic_reg = pic_offset_table_rtx;
1339 if (!reload_completed && !reload_in_progress)
1344 /* Split constant addresses to handle cases that are too large. Also, add in
1345 the pic register when in PIC mode. */
1347 spu_split_immediate (rtx * ops)
1349 enum machine_mode mode = GET_MODE (ops[0]);
1350 enum immediate_class c = classify_immediate (ops[1], mode);
1356 unsigned char arrhi[16];
1357 unsigned char arrlo[16];
1360 constant_to_array (mode, ops[1], arrhi);
1361 to = no_new_pseudos ? ops[0] : gen_reg_rtx (mode);
1362 for (i = 0; i < 16; i += 4)
1364 arrlo[i + 2] = arrhi[i + 2];
1365 arrlo[i + 3] = arrhi[i + 3];
1366 arrlo[i + 0] = arrlo[i + 1] = 0;
1367 arrhi[i + 2] = arrhi[i + 3] = 0;
1369 hi = array_to_constant (mode, arrhi);
1370 lo = array_to_constant (mode, arrlo);
1371 emit_move_insn (to, hi);
1372 emit_insn (gen_rtx_SET
1373 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1377 if (reload_in_progress || reload_completed)
1379 rtx mem = force_const_mem (mode, ops[1]);
1380 if (TARGET_LARGE_MEM)
1382 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1383 emit_move_insn (addr, XEXP (mem, 0));
1384 mem = replace_equiv_address (mem, addr);
1386 emit_move_insn (ops[0], mem);
1392 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1396 emit_insn (gen_high (ops[0], ops[1]));
1397 emit_insn (gen_low (ops[0], ops[0], ops[1]));
1400 emit_insn (gen_pic (ops[0], ops[1]));
1403 rtx pic_reg = get_pic_reg ();
1404 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1405 current_function_uses_pic_offset_table = 1;
1407 return flag_pic || c == IC_IL2s;
1418 /* SAVING is TRUE when we are generating the actual load and store
1419 instructions for REGNO. When determining the size of the stack
1420 needed for saving register we must allocate enough space for the
1421 worst case, because we don't always have the information early enough
1422 to not allocate it. But we can at least eliminate the actual loads
1423 and stores during the prologue/epilogue. */
1425 need_to_save_reg (int regno, int saving)
1427 if (regs_ever_live[regno] && !call_used_regs[regno])
1430 && regno == PIC_OFFSET_TABLE_REGNUM
1431 && (!saving || current_function_uses_pic_offset_table)
1433 || !current_function_is_leaf || regs_ever_live[LAST_ARG_REGNUM]))
1438 /* This function is only correct starting with local register
1441 spu_saved_regs_size (void)
1443 int reg_save_size = 0;
1446 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1447 if (need_to_save_reg (regno, 0))
1448 reg_save_size += 0x10;
1449 return reg_save_size;
1453 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1455 rtx reg = gen_rtx_REG (V4SImode, regno);
1457 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1458 return emit_insn (gen_movv4si (mem, reg));
1462 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1464 rtx reg = gen_rtx_REG (V4SImode, regno);
1466 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1467 return emit_insn (gen_movv4si (reg, mem));
1470 /* This happens after reload, so we need to expand it. */
1472 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1475 if (satisfies_constraint_K (GEN_INT (imm)))
1477 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1481 insn = emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1482 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1484 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1485 if (REGNO (src) == REGNO (scratch))
1488 if (REGNO (dst) == REGNO (scratch))
1489 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1494 /* Return nonzero if this function is known to have a null epilogue. */
1497 direct_return (void)
1499 if (reload_completed)
1501 if (cfun->static_chain_decl == 0
1502 && (spu_saved_regs_size ()
1504 + current_function_outgoing_args_size
1505 + current_function_pretend_args_size == 0)
1506 && current_function_is_leaf)
1513 The stack frame looks like this:
1520 prev SP | back chain |
1523 | reg save | current_function_pretend_args_size bytes
1526 | saved regs | spu_saved_regs_size() bytes
1529 FP | vars | get_frame_size() bytes
1533 | args | current_function_outgoing_args_size bytes
1543 spu_expand_prologue (void)
1545 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1546 HOST_WIDE_INT total_size;
1547 HOST_WIDE_INT saved_regs_size;
1548 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1549 rtx scratch_reg_0, scratch_reg_1;
1552 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1553 the "toplevel" insn chain. */
1554 emit_note (NOTE_INSN_DELETED);
1556 if (flag_pic && optimize == 0)
1557 current_function_uses_pic_offset_table = 1;
1559 if (spu_naked_function_p (current_function_decl))
1562 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1563 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1565 saved_regs_size = spu_saved_regs_size ();
1566 total_size = size + saved_regs_size
1567 + current_function_outgoing_args_size
1568 + current_function_pretend_args_size;
1570 if (!current_function_is_leaf
1571 || current_function_calls_alloca || total_size > 0)
1572 total_size += STACK_POINTER_OFFSET;
1574 /* Save this first because code after this might use the link
1575 register as a scratch register. */
1576 if (!current_function_is_leaf)
1578 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1579 RTX_FRAME_RELATED_P (insn) = 1;
1584 offset = -current_function_pretend_args_size;
1585 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1586 if (need_to_save_reg (regno, 1))
1589 insn = frame_emit_store (regno, sp_reg, offset);
1590 RTX_FRAME_RELATED_P (insn) = 1;
1594 if (flag_pic && current_function_uses_pic_offset_table)
1596 rtx pic_reg = get_pic_reg ();
1597 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1598 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1600 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1601 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1607 if (flag_stack_check)
1609 /* We compare against total_size-1 because
1610 ($sp >= total_size) <=> ($sp > total_size-1) */
1611 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1612 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1613 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1614 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1616 emit_move_insn (scratch_v4si, size_v4si);
1617 size_v4si = scratch_v4si;
1619 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1620 emit_insn (gen_vec_extractv4si
1621 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1622 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1625 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1626 the value of the previous $sp because we save it as the back
1628 if (total_size <= 2000)
1630 /* In this case we save the back chain first. */
1631 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1633 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1635 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1637 insn = emit_move_insn (scratch_reg_0, sp_reg);
1639 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1643 insn = emit_move_insn (scratch_reg_0, sp_reg);
1645 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1647 RTX_FRAME_RELATED_P (insn) = 1;
1648 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1650 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1652 if (total_size > 2000)
1654 /* Save the back chain ptr */
1655 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1658 if (frame_pointer_needed)
1660 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1661 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1662 + current_function_outgoing_args_size;
1663 /* Set the new frame_pointer */
1664 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1665 RTX_FRAME_RELATED_P (insn) = 1;
1666 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1668 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1669 real, REG_NOTES (insn));
1673 emit_note (NOTE_INSN_DELETED);
1677 spu_expand_epilogue (bool sibcall_p)
1679 int size = get_frame_size (), offset, regno;
1680 HOST_WIDE_INT saved_regs_size, total_size;
1681 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1682 rtx jump, scratch_reg_0;
1684 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1685 the "toplevel" insn chain. */
1686 emit_note (NOTE_INSN_DELETED);
1688 if (spu_naked_function_p (current_function_decl))
1691 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1693 saved_regs_size = spu_saved_regs_size ();
1694 total_size = size + saved_regs_size
1695 + current_function_outgoing_args_size
1696 + current_function_pretend_args_size;
1698 if (!current_function_is_leaf
1699 || current_function_calls_alloca || total_size > 0)
1700 total_size += STACK_POINTER_OFFSET;
1704 if (current_function_calls_alloca)
1705 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1707 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1710 if (saved_regs_size > 0)
1712 offset = -current_function_pretend_args_size;
1713 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1714 if (need_to_save_reg (regno, 1))
1717 frame_emit_load (regno, sp_reg, offset);
1722 if (!current_function_is_leaf)
1723 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1727 emit_insn (gen_rtx_USE
1728 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1729 jump = emit_jump_insn (gen__return ());
1730 emit_barrier_after (jump);
1733 emit_note (NOTE_INSN_DELETED);
1737 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1741 /* This is inefficient because it ends up copying to a save-register
1742 which then gets saved even though $lr has already been saved. But
1743 it does generate better code for leaf functions and we don't need
1744 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1745 used for __builtin_return_address anyway, so maybe we don't care if
1746 it's inefficient. */
1747 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1751 /* Given VAL, generate a constant appropriate for MODE.
1752 If MODE is a vector mode, every element will be VAL.
1753 For TImode, VAL will be zero extended to 128 bits. */
1755 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1761 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1762 || GET_MODE_CLASS (mode) == MODE_FLOAT
1763 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1764 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1766 if (GET_MODE_CLASS (mode) == MODE_INT)
1767 return immed_double_const (val, 0, mode);
1769 /* val is the bit representation of the float */
1770 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1771 return hwint_to_const_double (mode, val);
1773 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1774 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1776 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1778 units = GET_MODE_NUNITS (mode);
1780 v = rtvec_alloc (units);
1782 for (i = 0; i < units; ++i)
1783 RTVEC_ELT (v, i) = inner;
1785 return gen_rtx_CONST_VECTOR (mode, v);
1788 /* branch hint stuff */
1790 /* The hardware requires 8 insns between a hint and the branch it
1791 effects. This variable describes how many rtl instructions the
1792 compiler needs to see before inserting a hint. (FIXME: We should
1793 accept less and insert nops to enforce it because hinting is always
1794 profitable for performance, but we do need to be careful of code
1796 int spu_hint_dist = (8 * 4);
1798 /* An array of these is used to propagate hints to predecessor blocks. */
1801 rtx prop_jump; /* propagated from another block */
1802 basic_block bb; /* the original block. */
1805 /* The special $hbr register is used to prevent the insn scheduler from
1806 moving hbr insns across instructions which invalidate them. It
1807 should only be used in a clobber, and this function searches for
1808 insns which clobber it. */
1810 insn_clobbers_hbr (rtx insn)
1812 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
1814 rtx parallel = PATTERN (insn);
1817 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
1819 clobber = XVECEXP (parallel, 0, j);
1820 if (GET_CODE (clobber) == CLOBBER
1821 && GET_CODE (XEXP (clobber, 0)) == REG
1822 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
1830 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
1833 rtx hint, insn, prev, next;
1835 if (before == 0 || branch == 0 || target == 0)
1842 branch_label = gen_label_rtx ();
1843 LABEL_NUSES (branch_label)++;
1844 LABEL_PRESERVE_P (branch_label) = 1;
1845 insn = emit_label_before (branch_label, branch);
1846 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
1848 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1849 the current insn is pipe0, dual issue with it. */
1850 prev = prev_active_insn (before);
1851 if (prev && get_pipe (prev) == 0)
1852 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1853 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
1855 next = next_active_insn (before);
1856 hint = emit_insn_after (gen_hbr (branch_label, target), before);
1858 PUT_MODE (next, TImode);
1862 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1863 PUT_MODE (hint, TImode);
1865 recog_memoized (hint);
1868 /* Returns 0 if we don't want a hint for this branch. Otherwise return
1869 the rtx for the branch target. */
1871 get_branch_target (rtx branch)
1873 if (GET_CODE (branch) == JUMP_INSN)
1877 /* Return statements */
1878 if (GET_CODE (PATTERN (branch)) == RETURN)
1879 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1882 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
1883 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
1886 set = single_set (branch);
1887 src = SET_SRC (set);
1888 if (GET_CODE (SET_DEST (set)) != PC)
1891 if (GET_CODE (src) == IF_THEN_ELSE)
1894 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
1897 /* If the more probable case is not a fall through, then
1898 try a branch hint. */
1899 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
1900 if (prob > (REG_BR_PROB_BASE * 6 / 10)
1901 && GET_CODE (XEXP (src, 1)) != PC)
1902 lab = XEXP (src, 1);
1903 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
1904 && GET_CODE (XEXP (src, 2)) != PC)
1905 lab = XEXP (src, 2);
1909 if (GET_CODE (lab) == RETURN)
1910 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1918 else if (GET_CODE (branch) == CALL_INSN)
1921 /* All of our call patterns are in a PARALLEL and the CALL is
1922 the first pattern in the PARALLEL. */
1923 if (GET_CODE (PATTERN (branch)) != PARALLEL)
1925 call = XVECEXP (PATTERN (branch), 0, 0);
1926 if (GET_CODE (call) == SET)
1927 call = SET_SRC (call);
1928 if (GET_CODE (call) != CALL)
1930 return XEXP (XEXP (call, 0), 0);
1936 insert_branch_hints (void)
1938 struct spu_bb_info *spu_bb_info;
1939 rtx branch, insn, next;
1940 rtx branch_target = 0;
1941 int branch_addr = 0, insn_addr, head_addr;
1946 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
1947 sizeof (struct spu_bb_info));
1949 /* We need exact insn addresses and lengths. */
1950 shorten_branches (get_insns ());
1952 FOR_EACH_BB_REVERSE (bb)
1954 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
1956 if (spu_bb_info[bb->index].prop_jump)
1958 branch = spu_bb_info[bb->index].prop_jump;
1959 branch_target = get_branch_target (branch);
1960 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
1962 /* Search from end of a block to beginning. In this loop, find
1963 jumps which need a branch and emit them only when:
1964 - it's an indirect branch and we're at the insn which sets
1966 - we're at an insn that will invalidate the hint. e.g., a
1967 call, another hint insn, inline asm that clobbers $hbr, and
1968 some inlined operations (divmodsi4). Don't consider jumps
1969 because they are only at the end of a block and are
1970 considered when we are deciding whether to propagate
1971 - we're getting too far away from the branch. The hbr insns
1972 only have a signed 10-bit offset
1973 We go back as far as possible so the branch will be considered
1974 for propagation when we get to the beginning of the block. */
1976 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
1980 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
1982 && ((GET_CODE (branch_target) == REG
1983 && set_of (branch_target, insn) != NULL_RTX)
1984 || insn_clobbers_hbr (insn)
1985 || branch_addr - insn_addr > 600))
1987 int next_addr = INSN_ADDRESSES (INSN_UID (next));
1988 if (insn != BB_END (bb)
1989 && branch_addr - next_addr >= spu_hint_dist)
1993 "hint for %i in block %i before %i\n",
1994 INSN_UID (branch), bb->index, INSN_UID (next));
1995 spu_emit_branch_hint (next, branch, branch_target,
1996 branch_addr - next_addr);
2001 /* JUMP_P will only be true at the end of a block. When
2002 branch is already set it means we've previously decided
2003 to propagate a hint for that branch into this block. */
2004 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2007 if ((branch_target = get_branch_target (insn)))
2010 branch_addr = insn_addr;
2014 /* When a branch hint is emitted it will be inserted
2015 before "next". Make sure next is the beginning of a
2016 cycle to minimize impact on the scheduled insns. */
2017 if (GET_MODE (insn) == TImode)
2020 if (insn == BB_HEAD (bb))
2026 /* If we haven't emitted a hint for this branch yet, it might
2027 be profitable to emit it in one of the predecessor blocks,
2028 especially for loops. */
2030 basic_block prev = 0, prop = 0, prev2 = 0;
2031 int loop_exit = 0, simple_loop = 0;
2034 next_addr = INSN_ADDRESSES (INSN_UID (next));
2036 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2037 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2038 prev = EDGE_PRED (bb, j)->src;
2040 prev2 = EDGE_PRED (bb, j)->src;
2042 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2043 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2045 else if (EDGE_SUCC (bb, j)->dest == bb)
2048 /* If this branch is a loop exit then propagate to previous
2049 fallthru block. This catches the cases when it is a simple
2050 loop or when there is an initial branch into the loop. */
2051 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2054 /* If there is only one adjacent predecessor. Don't propagate
2055 outside this loop. This loop_depth test isn't perfect, but
2056 I'm not sure the loop_father member is valid at this point. */
2057 else if (prev && single_pred_p (bb)
2058 && prev->loop_depth == bb->loop_depth)
2061 /* If this is the JOIN block of a simple IF-THEN then
2062 propogate the hint to the HEADER block. */
2063 else if (prev && prev2
2064 && EDGE_COUNT (bb->preds) == 2
2065 && EDGE_COUNT (prev->preds) == 1
2066 && EDGE_PRED (prev, 0)->src == prev2
2067 && prev2->loop_depth == bb->loop_depth
2068 && GET_CODE (branch_target) != REG)
2071 /* Don't propagate when:
2072 - this is a simple loop and the hint would be too far
2073 - this is not a simple loop and there are 16 insns in
2075 - the predecessor block ends in a branch that will be
2077 - the predecessor block ends in an insn that invalidates
2081 && (bbend = BB_END (prop))
2082 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2083 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2084 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2087 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2088 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2089 bb->index, prop->index, bb->loop_depth,
2090 INSN_UID (branch), loop_exit, simple_loop,
2091 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2093 spu_bb_info[prop->index].prop_jump = branch;
2094 spu_bb_info[prop->index].bb = bb;
2096 else if (next && branch_addr - next_addr >= spu_hint_dist)
2099 fprintf (dump_file, "hint for %i in block %i before %i\n",
2100 INSN_UID (branch), bb->index, INSN_UID (next));
2101 spu_emit_branch_hint (next, branch, branch_target,
2102 branch_addr - next_addr);
2110 /* Emit a nop for INSN such that the two will dual issue. This assumes
2111 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2112 We check for TImode to handle a MULTI1 insn which has dual issued its
2113 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2116 emit_nop_for_insn (rtx insn)
2120 p = get_pipe (insn);
2121 if (p == 1 && GET_MODE (insn) == TImode)
2123 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2124 PUT_MODE (new_insn, TImode);
2125 PUT_MODE (insn, VOIDmode);
2128 new_insn = emit_insn_after (gen_lnop (), insn);
2131 /* Insert nops in basic blocks to meet dual issue alignment
2136 rtx insn, next_insn, prev_insn;
2140 /* This sets up INSN_ADDRESSES. */
2141 shorten_branches (get_insns ());
2143 /* Keep track of length added by nops. */
2147 for (insn = get_insns (); insn; insn = next_insn)
2149 next_insn = next_active_insn (insn);
2150 addr = INSN_ADDRESSES (INSN_UID (insn));
2151 if (GET_MODE (insn) == TImode
2153 && GET_MODE (next_insn) != TImode
2154 && ((addr + length) & 7) != 0)
2156 /* prev_insn will always be set because the first insn is
2157 always 8-byte aligned. */
2158 emit_nop_for_insn (prev_insn);
2166 spu_machine_dependent_reorg (void)
2170 if (TARGET_BRANCH_HINTS)
2171 insert_branch_hints ();
2177 /* Insn scheduling routines, primarily for dual issue. */
2179 spu_sched_issue_rate (void)
2185 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2186 int verbose ATTRIBUTE_UNUSED, rtx insn,
2189 if (GET_CODE (PATTERN (insn)) != USE
2190 && GET_CODE (PATTERN (insn)) != CLOBBER
2191 && get_pipe (insn) != -2)
2193 return can_issue_more;
2200 /* Handle inline asm */
2201 if (INSN_CODE (insn) == -1)
2203 t = get_attr_type (insn);
2219 case TYPE_IPREFETCH:
2236 spu_sched_adjust_priority (rtx insn, int pri)
2238 int p = get_pipe (insn);
2239 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2241 if (GET_CODE (PATTERN (insn)) == USE
2242 || GET_CODE (PATTERN (insn)) == CLOBBER
2245 /* Schedule pipe0 insns early for greedier dual issue. */
2251 /* INSN is dependent on DEP_INSN. */
2253 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2254 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2256 if (GET_CODE (insn) == CALL_INSN)
2258 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2259 scheduler makes every insn in a block anti-dependent on the final
2260 jump_insn. We adjust here so higher cost insns will get scheduled
2262 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2263 return insn_cost (dep_insn) - 3;
2267 /* Create a CONST_DOUBLE from a string. */
2269 spu_float_const (const char *string, enum machine_mode mode)
2271 REAL_VALUE_TYPE value;
2272 value = REAL_VALUE_ATOF (string, mode);
2273 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2276 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2277 CONST_INT fits constraint 'K', i.e., is small. */
2279 legitimate_const (rtx x, int aligned)
2281 /* We can never know if the resulting address fits in 18 bits and can be
2282 loaded with ila. Instead we should use the HI and LO relocations to
2283 load a 32-bit address. */
2286 gcc_assert (GET_CODE (x) == CONST);
2288 if (GET_CODE (XEXP (x, 0)) != PLUS)
2290 sym = XEXP (XEXP (x, 0), 0);
2291 cst = XEXP (XEXP (x, 0), 1);
2292 if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT)
2294 if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym)))
2296 return satisfies_constraint_K (cst);
2300 spu_constant_address_p (rtx x)
2302 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2303 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2304 || GET_CODE (x) == HIGH);
2307 static enum spu_immediate
2308 which_immediate_load (HOST_WIDE_INT val)
2310 gcc_assert (val == trunc_int_for_mode (val, SImode));
2312 if (val >= -0x8000 && val <= 0x7fff)
2314 if (val >= 0 && val <= 0x3ffff)
2316 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2318 if ((val & 0xffff) == 0)
2324 /* Return true when OP can be loaded by one of the il instructions, or
2325 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2327 immediate_load_p (rtx op, enum machine_mode mode)
2329 if (CONSTANT_P (op))
2331 enum immediate_class c = classify_immediate (op, mode);
2332 return c == IC_IL1 || (!flow2_completed && c == IC_IL2);
2337 /* Return true if the first SIZE bytes of arr is a constant that can be
2338 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2339 represent the size and offset of the instruction to use. */
2341 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2343 int cpat, run, i, start;
2347 for (i = 0; i < size && cpat; i++)
2355 else if (arr[i] == 2 && arr[i+1] == 3)
2357 else if (arr[i] == 0)
2359 while (arr[i+run] == run && i+run < 16)
2361 if (run != 4 && run != 8)
2366 if ((i & (run-1)) != 0)
2373 if (cpat && (run || size < 16))
2380 *pstart = start == -1 ? 16-run : start;
2386 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2387 it into a register. MODE is only valid when OP is a CONST_INT. */
2388 static enum immediate_class
2389 classify_immediate (rtx op, enum machine_mode mode)
2392 unsigned char arr[16];
2393 int i, j, repeated, fsmbi;
2395 gcc_assert (CONSTANT_P (op));
2397 if (GET_MODE (op) != VOIDmode)
2398 mode = GET_MODE (op);
2400 /* A V4SI const_vector with all identical symbols is ok. */
2401 if (mode == V4SImode
2402 && GET_CODE (op) == CONST_VECTOR
2403 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2404 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2405 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2406 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2407 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2408 op = CONST_VECTOR_ELT (op, 0);
2410 switch (GET_CODE (op))
2414 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
2417 return TARGET_LARGE_MEM
2418 || !legitimate_const (op, 0) ? IC_IL2s : IC_IL1s;
2424 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2425 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2426 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2432 constant_to_array (mode, op, arr);
2434 /* Check that each 4-byte slot is identical. */
2436 for (i = 4; i < 16; i += 4)
2437 for (j = 0; j < 4; j++)
2438 if (arr[j] != arr[i + j])
2443 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2444 val = trunc_int_for_mode (val, SImode);
2446 if (which_immediate_load (val) != SPU_NONE)
2450 /* Any mode of 2 bytes or smaller can be loaded with an il
2452 gcc_assert (GET_MODE_SIZE (mode) > 2);
2455 for (i = 0; i < 16 && fsmbi; i++)
2456 if (arr[i] != 0 && arr[i] != 0xff)
2461 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2474 static enum spu_immediate
2475 which_logical_immediate (HOST_WIDE_INT val)
2477 gcc_assert (val == trunc_int_for_mode (val, SImode));
2479 if (val >= -0x200 && val <= 0x1ff)
2481 if (val >= 0 && val <= 0xffff)
2483 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2485 val = trunc_int_for_mode (val, HImode);
2486 if (val >= -0x200 && val <= 0x1ff)
2488 if ((val & 0xff) == ((val >> 8) & 0xff))
2490 val = trunc_int_for_mode (val, QImode);
2491 if (val >= -0x200 && val <= 0x1ff)
2499 logical_immediate_p (rtx op, enum machine_mode mode)
2502 unsigned char arr[16];
2505 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2506 || GET_CODE (op) == CONST_VECTOR);
2508 if (GET_MODE (op) != VOIDmode)
2509 mode = GET_MODE (op);
2511 constant_to_array (mode, op, arr);
2513 /* Check that bytes are repeated. */
2514 for (i = 4; i < 16; i += 4)
2515 for (j = 0; j < 4; j++)
2516 if (arr[j] != arr[i + j])
2519 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2520 val = trunc_int_for_mode (val, SImode);
2522 i = which_logical_immediate (val);
2523 return i != SPU_NONE && i != SPU_IOHL;
2527 iohl_immediate_p (rtx op, enum machine_mode mode)
2530 unsigned char arr[16];
2533 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2534 || GET_CODE (op) == CONST_VECTOR);
2536 if (GET_MODE (op) != VOIDmode)
2537 mode = GET_MODE (op);
2539 constant_to_array (mode, op, arr);
2541 /* Check that bytes are repeated. */
2542 for (i = 4; i < 16; i += 4)
2543 for (j = 0; j < 4; j++)
2544 if (arr[j] != arr[i + j])
2547 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2548 val = trunc_int_for_mode (val, SImode);
2550 return val >= 0 && val <= 0xffff;
2554 arith_immediate_p (rtx op, enum machine_mode mode,
2555 HOST_WIDE_INT low, HOST_WIDE_INT high)
2558 unsigned char arr[16];
2561 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2562 || GET_CODE (op) == CONST_VECTOR);
2564 if (GET_MODE (op) != VOIDmode)
2565 mode = GET_MODE (op);
2567 constant_to_array (mode, op, arr);
2569 if (VECTOR_MODE_P (mode))
2570 mode = GET_MODE_INNER (mode);
2572 bytes = GET_MODE_SIZE (mode);
2573 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2575 /* Check that bytes are repeated. */
2576 for (i = bytes; i < 16; i += bytes)
2577 for (j = 0; j < bytes; j++)
2578 if (arr[j] != arr[i + j])
2582 for (j = 1; j < bytes; j++)
2583 val = (val << 8) | arr[j];
2585 val = trunc_int_for_mode (val, mode);
2587 return val >= low && val <= high;
2591 - any 32-bit constant (SImode, SFmode)
2592 - any constant that can be generated with fsmbi (any mode)
2593 - a 64-bit constant where the high and low bits are identical
2595 - a 128-bit constant where the four 32-bit words match. */
2597 spu_legitimate_constant_p (rtx x)
2600 /* V4SI with all identical symbols is valid. */
2601 if (GET_MODE (x) == V4SImode
2602 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2603 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2604 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST
2605 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == HIGH))
2606 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2607 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2608 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2610 if (VECTOR_MODE_P (GET_MODE (x)))
2611 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2612 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2613 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2618 /* Valid address are:
2619 - symbol_ref, label_ref, const
2621 - reg + const, where either reg or const is 16 byte aligned
2622 - reg + reg, alignment doesn't matter
2623 The alignment matters in the reg+const case because lqd and stqd
2624 ignore the 4 least significant bits of the const. (TODO: It might be
2625 preferable to allow any alignment and fix it up when splitting.) */
2627 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2628 rtx x, int reg_ok_strict)
2630 if (mode == TImode && GET_CODE (x) == AND
2631 && GET_CODE (XEXP (x, 1)) == CONST_INT
2632 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2634 switch (GET_CODE (x))
2638 return !TARGET_LARGE_MEM;
2641 return !TARGET_LARGE_MEM && legitimate_const (x, 0);
2644 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2648 gcc_assert (GET_CODE (x) == REG);
2651 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2656 rtx op0 = XEXP (x, 0);
2657 rtx op1 = XEXP (x, 1);
2658 if (GET_CODE (op0) == SUBREG)
2659 op0 = XEXP (op0, 0);
2660 if (GET_CODE (op1) == SUBREG)
2661 op1 = XEXP (op1, 0);
2662 /* We can't just accept any aligned register because CSE can
2663 change it to a register that is not marked aligned and then
2664 recog will fail. So we only accept frame registers because
2665 they will only be changed to other frame registers. */
2666 if (GET_CODE (op0) == REG
2667 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2668 && GET_CODE (op1) == CONST_INT
2669 && INTVAL (op1) >= -0x2000
2670 && INTVAL (op1) <= 0x1fff
2671 && (REGNO_PTR_FRAME_P (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2673 if (GET_CODE (op0) == REG
2674 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2675 && GET_CODE (op1) == REG
2676 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2687 /* When the address is reg + const_int, force the const_int into a
2690 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2691 enum machine_mode mode)
2694 /* Make sure both operands are registers. */
2695 if (GET_CODE (x) == PLUS)
2699 if (ALIGNED_SYMBOL_REF_P (op0))
2701 op0 = force_reg (Pmode, op0);
2702 mark_reg_pointer (op0, 128);
2704 else if (GET_CODE (op0) != REG)
2705 op0 = force_reg (Pmode, op0);
2706 if (ALIGNED_SYMBOL_REF_P (op1))
2708 op1 = force_reg (Pmode, op1);
2709 mark_reg_pointer (op1, 128);
2711 else if (GET_CODE (op1) != REG)
2712 op1 = force_reg (Pmode, op1);
2713 x = gen_rtx_PLUS (Pmode, op0, op1);
2714 if (spu_legitimate_address (mode, x, 0))
2720 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2721 struct attribute_spec.handler. */
2723 spu_handle_fndecl_attribute (tree * node,
2725 tree args ATTRIBUTE_UNUSED,
2726 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2728 if (TREE_CODE (*node) != FUNCTION_DECL)
2730 warning (0, "`%s' attribute only applies to functions",
2731 IDENTIFIER_POINTER (name));
2732 *no_add_attrs = true;
2738 /* Handle the "vector" attribute. */
2740 spu_handle_vector_attribute (tree * node, tree name,
2741 tree args ATTRIBUTE_UNUSED,
2742 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2744 tree type = *node, result = NULL_TREE;
2745 enum machine_mode mode;
2748 while (POINTER_TYPE_P (type)
2749 || TREE_CODE (type) == FUNCTION_TYPE
2750 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2751 type = TREE_TYPE (type);
2753 mode = TYPE_MODE (type);
2755 unsigned_p = TYPE_UNSIGNED (type);
2759 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2762 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2765 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
2768 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
2771 result = V4SF_type_node;
2774 result = V2DF_type_node;
2780 /* Propagate qualifiers attached to the element type
2781 onto the vector type. */
2782 if (result && result != type && TYPE_QUALS (type))
2783 result = build_qualified_type (result, TYPE_QUALS (type));
2785 *no_add_attrs = true; /* No need to hang on to the attribute. */
2788 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
2790 *node = reconstruct_complex_type (*node, result);
2795 /* Return nonzero if FUNC is a naked function. */
2797 spu_naked_function_p (tree func)
2801 if (TREE_CODE (func) != FUNCTION_DECL)
2804 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
2805 return a != NULL_TREE;
2809 spu_initial_elimination_offset (int from, int to)
2811 int saved_regs_size = spu_saved_regs_size ();
2813 if (!current_function_is_leaf || current_function_outgoing_args_size
2814 || get_frame_size () || saved_regs_size)
2815 sp_offset = STACK_POINTER_OFFSET;
2816 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2817 return (sp_offset + current_function_outgoing_args_size);
2818 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2820 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2821 return sp_offset + current_function_outgoing_args_size
2822 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
2823 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2824 return get_frame_size () + saved_regs_size + sp_offset;
2829 spu_function_value (tree type, tree func ATTRIBUTE_UNUSED)
2831 enum machine_mode mode = TYPE_MODE (type);
2832 int byte_size = ((mode == BLKmode)
2833 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2835 /* Make sure small structs are left justified in a register. */
2836 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2837 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
2839 enum machine_mode smode;
2842 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2843 int n = byte_size / UNITS_PER_WORD;
2844 v = rtvec_alloc (nregs);
2845 for (i = 0; i < n; i++)
2847 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
2848 gen_rtx_REG (TImode,
2851 GEN_INT (UNITS_PER_WORD * i));
2852 byte_size -= UNITS_PER_WORD;
2860 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2862 gen_rtx_EXPR_LIST (VOIDmode,
2863 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
2864 GEN_INT (UNITS_PER_WORD * n));
2866 return gen_rtx_PARALLEL (mode, v);
2868 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
2872 spu_function_arg (CUMULATIVE_ARGS cum,
2873 enum machine_mode mode,
2874 tree type, int named ATTRIBUTE_UNUSED)
2878 if (cum >= MAX_REGISTER_ARGS)
2881 byte_size = ((mode == BLKmode)
2882 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2884 /* The ABI does not allow parameters to be passed partially in
2885 reg and partially in stack. */
2886 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
2889 /* Make sure small structs are left justified in a register. */
2890 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2891 && byte_size < UNITS_PER_WORD && byte_size > 0)
2893 enum machine_mode smode;
2897 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2898 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2899 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
2901 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
2904 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
2907 /* Variable sized types are passed by reference. */
2909 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
2910 enum machine_mode mode ATTRIBUTE_UNUSED,
2911 tree type, bool named ATTRIBUTE_UNUSED)
2913 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2919 /* Create and return the va_list datatype.
2921 On SPU, va_list is an array type equivalent to
2923 typedef struct __va_list_tag
2925 void *__args __attribute__((__aligned(16)));
2926 void *__skip __attribute__((__aligned(16)));
2930 where __args points to the arg that will be returned by the next
2931 va_arg(), and __skip points to the previous stack frame such that
2932 when __args == __skip we should advance __args by 32 bytes. */
2934 spu_build_builtin_va_list (void)
2936 tree f_args, f_skip, record, type_decl;
2939 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2942 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2944 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
2945 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
2947 DECL_FIELD_CONTEXT (f_args) = record;
2948 DECL_ALIGN (f_args) = 128;
2949 DECL_USER_ALIGN (f_args) = 1;
2951 DECL_FIELD_CONTEXT (f_skip) = record;
2952 DECL_ALIGN (f_skip) = 128;
2953 DECL_USER_ALIGN (f_skip) = 1;
2955 TREE_CHAIN (record) = type_decl;
2956 TYPE_NAME (record) = type_decl;
2957 TYPE_FIELDS (record) = f_args;
2958 TREE_CHAIN (f_args) = f_skip;
2960 /* We know this is being padded and we want it too. It is an internal
2961 type so hide the warnings from the user. */
2963 warn_padded = false;
2965 layout_type (record);
2969 /* The correct type is an array type of one element. */
2970 return build_array_type (record, build_index_type (size_zero_node));
2973 /* Implement va_start by filling the va_list structure VALIST.
2974 NEXTARG points to the first anonymous stack argument.
2976 The following global variables are used to initialize
2977 the va_list structure:
2979 current_function_args_info;
2980 the CUMULATIVE_ARGS for this function
2982 current_function_arg_offset_rtx:
2983 holds the offset of the first anonymous stack argument
2984 (relative to the virtual arg pointer). */
2987 spu_va_start (tree valist, rtx nextarg)
2989 tree f_args, f_skip;
2992 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2993 f_skip = TREE_CHAIN (f_args);
2995 valist = build_va_arg_indirect_ref (valist);
2997 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2999 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3001 /* Find the __args area. */
3002 t = make_tree (TREE_TYPE (args), nextarg);
3003 if (current_function_pretend_args_size > 0)
3004 t = build2 (PLUS_EXPR, TREE_TYPE (args), t,
3005 build_int_cst (integer_type_node, -STACK_POINTER_OFFSET));
3006 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
3007 TREE_SIDE_EFFECTS (t) = 1;
3008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3010 /* Find the __skip area. */
3011 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3012 t = build2 (PLUS_EXPR, TREE_TYPE (skip), t,
3013 build_int_cst (integer_type_node,
3014 (current_function_pretend_args_size
3015 - STACK_POINTER_OFFSET)));
3016 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
3017 TREE_SIDE_EFFECTS (t) = 1;
3018 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3021 /* Gimplify va_arg by updating the va_list structure
3022 VALIST as required to retrieve an argument of type
3023 TYPE, and returning that argument.
3025 ret = va_arg(VALIST, TYPE);
3027 generates code equivalent to:
3029 paddedsize = (sizeof(TYPE) + 15) & -16;
3030 if (VALIST.__args + paddedsize > VALIST.__skip
3031 && VALIST.__args <= VALIST.__skip)
3032 addr = VALIST.__skip + 32;
3034 addr = VALIST.__args;
3035 VALIST.__args = addr + paddedsize;
3036 ret = *(TYPE *)addr;
3039 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
3040 tree * post_p ATTRIBUTE_UNUSED)
3042 tree f_args, f_skip;
3044 HOST_WIDE_INT size, rsize;
3045 tree paddedsize, addr, tmp;
3046 bool pass_by_reference_p;
3048 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3049 f_skip = TREE_CHAIN (f_args);
3051 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3053 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3055 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3057 addr = create_tmp_var (ptr_type_node, "va_arg");
3058 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3060 /* if an object is dynamically sized, a pointer to it is passed
3061 instead of the object itself. */
3062 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3064 if (pass_by_reference_p)
3065 type = build_pointer_type (type);
3066 size = int_size_in_bytes (type);
3067 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3069 /* build conditional expression to calculate addr. The expression
3070 will be gimplified later. */
3071 paddedsize = fold_convert (ptr_type_node, size_int (rsize));
3072 tmp = build2 (PLUS_EXPR, ptr_type_node, args, paddedsize);
3073 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3074 build2 (GT_EXPR, boolean_type_node, tmp, skip),
3075 build2 (LE_EXPR, boolean_type_node, args, skip));
3077 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3078 build2 (PLUS_EXPR, ptr_type_node, skip,
3079 fold_convert (ptr_type_node, size_int (32))), args);
3081 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
3082 gimplify_and_add (tmp, pre_p);
3084 /* update VALIST.__args */
3085 tmp = build2 (PLUS_EXPR, ptr_type_node, addr, paddedsize);
3086 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
3087 gimplify_and_add (tmp, pre_p);
3089 addr = fold_convert (build_pointer_type (type), addr);
3091 if (pass_by_reference_p)
3092 addr = build_va_arg_indirect_ref (addr);
3094 return build_va_arg_indirect_ref (addr);
3097 /* Save parameter registers starting with the register that corresponds
3098 to the first unnamed parameters. If the first unnamed parameter is
3099 in the stack then save no registers. Set pretend_args_size to the
3100 amount of space needed to save the registers. */
3102 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3103 tree type, int *pretend_size, int no_rtl)
3112 /* cum currently points to the last named argument, we want to
3113 start at the next argument. */
3114 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3116 offset = -STACK_POINTER_OFFSET;
3117 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3119 tmp = gen_frame_mem (V4SImode,
3120 plus_constant (virtual_incoming_args_rtx,
3122 emit_move_insn (tmp,
3123 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3126 *pretend_size = offset + STACK_POINTER_OFFSET;
3131 spu_conditional_register_usage (void)
3135 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3136 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3138 global_regs[INTR_REGNUM] = 1;
3141 /* This is called to decide when we can simplify a load instruction. We
3142 must only return true for registers which we know will always be
3143 aligned. Taking into account that CSE might replace this reg with
3144 another one that has not been marked aligned.
3145 So this is really only true for frame, stack and virtual registers,
3146 which we know are always aligned and should not be adversely effected
3149 regno_aligned_for_load (int regno)
3151 return regno == FRAME_POINTER_REGNUM
3152 || regno == HARD_FRAME_POINTER_REGNUM
3153 || regno == STACK_POINTER_REGNUM
3154 || (regno >= FIRST_VIRTUAL_REGISTER && regno <= LAST_VIRTUAL_REGISTER);
3157 /* Return TRUE when mem is known to be 16-byte aligned. */
3159 aligned_mem_p (rtx mem)
3161 if (MEM_ALIGN (mem) >= 128)
3163 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3165 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3167 rtx p0 = XEXP (XEXP (mem, 0), 0);
3168 rtx p1 = XEXP (XEXP (mem, 0), 1);
3169 if (regno_aligned_for_load (REGNO (p0)))
3171 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3173 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3177 else if (GET_CODE (XEXP (mem, 0)) == REG)
3179 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3182 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3184 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3186 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3187 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3188 if (GET_CODE (p0) == SYMBOL_REF
3189 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3195 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3196 into its SYMBOL_REF_FLAGS. */
3198 spu_encode_section_info (tree decl, rtx rtl, int first)
3200 default_encode_section_info (decl, rtl, first);
3202 /* If a variable has a forced alignment to < 16 bytes, mark it with
3203 SYMBOL_FLAG_ALIGN1. */
3204 if (TREE_CODE (decl) == VAR_DECL
3205 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3206 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3209 /* Return TRUE if we are certain the mem refers to a complete object
3210 which is both 16-byte aligned and padded to a 16-byte boundary. This
3211 would make it safe to store with a single instruction.
3212 We guarantee the alignment and padding for static objects by aligning
3213 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3214 FIXME: We currently cannot guarantee this for objects on the stack
3215 because assign_parm_setup_stack calls assign_stack_local with the
3216 alignment of the parameter mode and in that case the alignment never
3217 gets adjusted by LOCAL_ALIGNMENT. */
3219 store_with_one_insn_p (rtx mem)
3221 rtx addr = XEXP (mem, 0);
3222 if (GET_MODE (mem) == BLKmode)
3224 /* Only static objects. */
3225 if (GET_CODE (addr) == SYMBOL_REF)
3227 /* We use the associated declaration to make sure the access is
3228 referring to the whole object.
3229 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3230 if it is necessary. Will there be cases where one exists, and
3231 the other does not? Will there be cases where both exist, but
3232 have different types? */
3233 tree decl = MEM_EXPR (mem);
3235 && TREE_CODE (decl) == VAR_DECL
3236 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3238 decl = SYMBOL_REF_DECL (addr);
3240 && TREE_CODE (decl) == VAR_DECL
3241 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3248 spu_expand_mov (rtx * ops, enum machine_mode mode)
3250 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3253 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3255 rtx from = SUBREG_REG (ops[1]);
3256 enum machine_mode imode = GET_MODE (from);
3258 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3259 && GET_MODE_CLASS (imode) == MODE_INT
3260 && subreg_lowpart_p (ops[1]));
3262 if (GET_MODE_SIZE (imode) < 4)
3264 from = gen_rtx_SUBREG (SImode, from, 0);
3268 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3270 enum insn_code icode = trunc_optab->handlers[mode][imode].insn_code;
3271 emit_insn (GEN_FCN (icode) (ops[0], from));
3274 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3278 /* At least one of the operands needs to be a register. */
3279 if ((reload_in_progress | reload_completed) == 0
3280 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3282 rtx temp = force_reg (mode, ops[1]);
3283 emit_move_insn (ops[0], temp);
3286 if (reload_in_progress || reload_completed)
3288 if (CONSTANT_P (ops[1]))
3289 return spu_split_immediate (ops);
3294 if (GET_CODE (ops[0]) == MEM)
3296 if (!spu_valid_move (ops))
3298 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3299 gen_reg_rtx (TImode)));
3303 else if (GET_CODE (ops[1]) == MEM)
3305 if (!spu_valid_move (ops))
3308 (ops[0], ops[1], gen_reg_rtx (TImode),
3309 gen_reg_rtx (SImode)));
3313 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3315 if (GET_CODE (ops[1]) == CONST_INT)
3317 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3318 if (val != INTVAL (ops[1]))
3320 emit_move_insn (ops[0], GEN_INT (val));
3331 /* For now, only frame registers are known to be aligned at all times.
3332 We can't trust REGNO_POINTER_ALIGN because optimization will move
3333 registers around, potentially changing an "aligned" register in an
3334 address to an unaligned register, which would result in an invalid
3336 int regno = REGNO (reg);
3337 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3341 spu_split_load (rtx * ops)
3343 enum machine_mode mode = GET_MODE (ops[0]);
3344 rtx addr, load, rot, mem, p0, p1;
3347 addr = XEXP (ops[1], 0);
3351 if (GET_CODE (addr) == PLUS)
3354 aligned reg + aligned reg => lqx
3355 aligned reg + unaligned reg => lqx, rotqby
3356 aligned reg + aligned const => lqd
3357 aligned reg + unaligned const => lqd, rotqbyi
3358 unaligned reg + aligned reg => lqx, rotqby
3359 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3360 unaligned reg + aligned const => lqd, rotqby
3361 unaligned reg + unaligned const -> not allowed by legitimate address
3363 p0 = XEXP (addr, 0);
3364 p1 = XEXP (addr, 1);
3365 if (reg_align (p0) < 128)
3367 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3369 emit_insn (gen_addsi3 (ops[3], p0, p1));
3377 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3379 rot_amt = INTVAL (p1) & 15;
3380 p1 = GEN_INT (INTVAL (p1) & -16);
3381 addr = gen_rtx_PLUS (SImode, p0, p1);
3383 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3387 else if (GET_CODE (addr) == REG)
3389 if (reg_align (addr) < 128)
3392 else if (GET_CODE (addr) == CONST)
3394 if (GET_CODE (XEXP (addr, 0)) == PLUS
3395 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3396 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3398 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3400 addr = gen_rtx_CONST (Pmode,
3401 gen_rtx_PLUS (Pmode,
3402 XEXP (XEXP (addr, 0), 0),
3403 GEN_INT (rot_amt & -16)));
3405 addr = XEXP (XEXP (addr, 0), 0);
3410 else if (GET_CODE (addr) == CONST_INT)
3412 rot_amt = INTVAL (addr);
3413 addr = GEN_INT (rot_amt & -16);
3415 else if (!ALIGNED_SYMBOL_REF_P (addr))
3418 if (GET_MODE_SIZE (mode) < 4)
3419 rot_amt += GET_MODE_SIZE (mode) - 4;
3425 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3432 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3433 mem = change_address (ops[1], TImode, addr);
3435 emit_insn (gen_movti (load, mem));
3438 emit_insn (gen_rotqby_ti (load, load, rot));
3440 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3442 if (reload_completed)
3443 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3445 emit_insn (gen_spu_convert (ops[0], load));
3449 spu_split_store (rtx * ops)
3451 enum machine_mode mode = GET_MODE (ops[0]);
3454 rtx addr, p0, p1, p1_lo, smem;
3458 addr = XEXP (ops[0], 0);
3460 if (GET_CODE (addr) == PLUS)
3463 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3464 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3465 aligned reg + aligned const => lqd, c?d, shuf, stqx
3466 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3467 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3468 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3469 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3470 unaligned reg + unaligned const -> not allowed by legitimate address
3473 p0 = XEXP (addr, 0);
3474 p1 = p1_lo = XEXP (addr, 1);
3475 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3477 p1_lo = GEN_INT (INTVAL (p1) & 15);
3478 p1 = GEN_INT (INTVAL (p1) & -16);
3479 addr = gen_rtx_PLUS (SImode, p0, p1);
3482 else if (GET_CODE (addr) == REG)
3486 p1 = p1_lo = const0_rtx;
3491 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3492 p1 = 0; /* aform doesn't use p1 */
3494 if (ALIGNED_SYMBOL_REF_P (addr))
3496 else if (GET_CODE (addr) == CONST)
3498 if (GET_CODE (XEXP (addr, 0)) == PLUS
3499 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3500 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3502 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3504 addr = gen_rtx_CONST (Pmode,
3505 gen_rtx_PLUS (Pmode,
3506 XEXP (XEXP (addr, 0), 0),
3507 GEN_INT (v & -16)));
3509 addr = XEXP (XEXP (addr, 0), 0);
3510 p1_lo = GEN_INT (v & 15);
3513 else if (GET_CODE (addr) == CONST_INT)
3515 p1_lo = GEN_INT (INTVAL (addr) & 15);
3516 addr = GEN_INT (INTVAL (addr) & -16);
3520 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3522 scalar = store_with_one_insn_p (ops[0]);
3525 /* We could copy the flags from the ops[0] MEM to mem here,
3526 We don't because we want this load to be optimized away if
3527 possible, and copying the flags will prevent that in certain
3528 cases, e.g. consider the volatile flag. */
3530 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3531 set_mem_alias_set (lmem, 0);
3532 emit_insn (gen_movti (reg, lmem));
3534 if (!p0 || reg_align (p0) >= 128)
3535 p0 = stack_pointer_rtx;
3539 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3540 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3542 else if (reload_completed)
3544 if (GET_CODE (ops[1]) == REG)
3545 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3546 else if (GET_CODE (ops[1]) == SUBREG)
3547 emit_move_insn (reg,
3548 gen_rtx_REG (GET_MODE (reg),
3549 REGNO (SUBREG_REG (ops[1]))));
3555 if (GET_CODE (ops[1]) == REG)
3556 emit_insn (gen_spu_convert (reg, ops[1]));
3557 else if (GET_CODE (ops[1]) == SUBREG)
3558 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3563 if (GET_MODE_SIZE (mode) < 4 && scalar)
3564 emit_insn (gen_shlqby_ti
3565 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3567 smem = change_address (ops[0], TImode, addr);
3568 /* We can't use the previous alias set because the memory has changed
3569 size and can potentially overlap objects of other types. */
3570 set_mem_alias_set (smem, 0);
3572 emit_insn (gen_movti (smem, reg));
3575 /* Return TRUE if X is MEM which is a struct member reference
3576 and the member can safely be loaded and stored with a single
3577 instruction because it is padded. */
3579 mem_is_padded_component_ref (rtx x)
3581 tree t = MEM_EXPR (x);
3583 if (!t || TREE_CODE (t) != COMPONENT_REF)
3585 t = TREE_OPERAND (t, 1);
3586 if (!t || TREE_CODE (t) != FIELD_DECL
3587 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3589 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3590 r = DECL_FIELD_CONTEXT (t);
3591 if (!r || TREE_CODE (r) != RECORD_TYPE)
3593 /* Make sure they are the same mode */
3594 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3596 /* If there are no following fields then the field alignment assures
3597 the structure is padded to the alignment which means this field is
3599 if (TREE_CHAIN (t) == 0)
3601 /* If the following field is also aligned then this field will be
3604 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3609 /* Parse the -mfixed-range= option string. */
3611 fix_range (const char *const_str)
3614 char *str, *dash, *comma;
3616 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3617 REG2 are either register names or register numbers. The effect
3618 of this option is to mark the registers in the range from REG1 to
3619 REG2 as ``fixed'' so they won't be used by the compiler. */
3621 i = strlen (const_str);
3622 str = (char *) alloca (i + 1);
3623 memcpy (str, const_str, i + 1);
3627 dash = strchr (str, '-');
3630 warning (0, "value of -mfixed-range must have form REG1-REG2");
3634 comma = strchr (dash + 1, ',');
3638 first = decode_reg_name (str);
3641 warning (0, "unknown register name: %s", str);
3645 last = decode_reg_name (dash + 1);
3648 warning (0, "unknown register name: %s", dash + 1);
3656 warning (0, "%s-%s is an empty range", str, dash + 1);
3660 for (i = first; i <= last; ++i)
3661 fixed_regs[i] = call_used_regs[i] = 1;
3672 spu_valid_move (rtx * ops)
3674 enum machine_mode mode = GET_MODE (ops[0]);
3675 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3678 /* init_expr_once tries to recog against load and store insns to set
3679 the direct_load[] and direct_store[] arrays. We always want to
3680 consider those loads and stores valid. init_expr_once is called in
3681 the context of a dummy function which does not have a decl. */
3682 if (cfun->decl == 0)
3685 /* Don't allows loads/stores which would require more than 1 insn.
3686 During and after reload we assume loads and stores only take 1
3688 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3690 if (GET_CODE (ops[0]) == MEM
3691 && (GET_MODE_SIZE (mode) < 4
3692 || !(store_with_one_insn_p (ops[0])
3693 || mem_is_padded_component_ref (ops[0]))))
3695 if (GET_CODE (ops[1]) == MEM
3696 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3702 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3703 can be generated using the fsmbi instruction. */
3705 fsmbi_const_p (rtx x)
3709 /* We can always choose DImode for CONST_INT because the high bits
3710 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3711 enum immediate_class c = classify_immediate (x, DImode);
3712 return c == IC_FSMBI;
3717 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3718 can be generated using the cbd, chd, cwd or cdd instruction. */
3720 cpat_const_p (rtx x, enum machine_mode mode)
3724 enum immediate_class c = classify_immediate (x, mode);
3725 return c == IC_CPAT;
3731 gen_cpat_const (rtx * ops)
3733 unsigned char dst[16];
3734 int i, offset, shift, isize;
3735 if (GET_CODE (ops[3]) != CONST_INT
3736 || GET_CODE (ops[2]) != CONST_INT
3737 || (GET_CODE (ops[1]) != CONST_INT
3738 && GET_CODE (ops[1]) != REG))
3740 if (GET_CODE (ops[1]) == REG
3741 && (!REG_POINTER (ops[1])
3742 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3745 for (i = 0; i < 16; i++)
3747 isize = INTVAL (ops[3]);
3750 else if (isize == 2)
3754 offset = (INTVAL (ops[2]) +
3755 (GET_CODE (ops[1]) ==
3756 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3757 for (i = 0; i < isize; i++)
3758 dst[offset + i] = i + shift;
3759 return array_to_constant (TImode, dst);
3762 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3763 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3764 than 16 bytes, the value is repeated across the rest of the array. */
3766 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3771 memset (arr, 0, 16);
3772 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3773 if (GET_CODE (x) == CONST_INT
3774 || (GET_CODE (x) == CONST_DOUBLE
3775 && (mode == SFmode || mode == DFmode)))
3777 gcc_assert (mode != VOIDmode && mode != BLKmode);
3779 if (GET_CODE (x) == CONST_DOUBLE)
3780 val = const_double_to_hwint (x);
3783 first = GET_MODE_SIZE (mode) - 1;
3784 for (i = first; i >= 0; i--)
3786 arr[i] = val & 0xff;
3789 /* Splat the constant across the whole array. */
3790 for (j = 0, i = first + 1; i < 16; i++)
3793 j = (j == first) ? 0 : j + 1;
3796 else if (GET_CODE (x) == CONST_DOUBLE)
3798 val = CONST_DOUBLE_LOW (x);
3799 for (i = 15; i >= 8; i--)
3801 arr[i] = val & 0xff;
3804 val = CONST_DOUBLE_HIGH (x);
3805 for (i = 7; i >= 0; i--)
3807 arr[i] = val & 0xff;
3811 else if (GET_CODE (x) == CONST_VECTOR)
3815 mode = GET_MODE_INNER (mode);
3816 units = CONST_VECTOR_NUNITS (x);
3817 for (i = 0; i < units; i++)
3819 elt = CONST_VECTOR_ELT (x, i);
3820 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
3822 if (GET_CODE (elt) == CONST_DOUBLE)
3823 val = const_double_to_hwint (elt);
3826 first = GET_MODE_SIZE (mode) - 1;
3827 if (first + i * GET_MODE_SIZE (mode) > 16)
3829 for (j = first; j >= 0; j--)
3831 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
3841 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
3842 smaller than 16 bytes, use the bytes that would represent that value
3843 in a register, e.g., for QImode return the value of arr[3]. */
3845 array_to_constant (enum machine_mode mode, unsigned char arr[16])
3847 enum machine_mode inner_mode;
3849 int units, size, i, j, k;
3852 if (GET_MODE_CLASS (mode) == MODE_INT
3853 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3855 j = GET_MODE_SIZE (mode);
3856 i = j < 4 ? 4 - j : 0;
3857 for (val = 0; i < j; i++)
3858 val = (val << 8) | arr[i];
3859 val = trunc_int_for_mode (val, mode);
3860 return GEN_INT (val);
3866 for (i = high = 0; i < 8; i++)
3867 high = (high << 8) | arr[i];
3868 for (i = 8, val = 0; i < 16; i++)
3869 val = (val << 8) | arr[i];
3870 return immed_double_const (val, high, TImode);
3874 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3875 val = trunc_int_for_mode (val, SImode);
3876 return hwint_to_const_double (SFmode, val);
3880 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3882 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
3883 return hwint_to_const_double (DFmode, val);
3886 if (!VECTOR_MODE_P (mode))
3889 units = GET_MODE_NUNITS (mode);
3890 size = GET_MODE_UNIT_SIZE (mode);
3891 inner_mode = GET_MODE_INNER (mode);
3892 v = rtvec_alloc (units);
3894 for (k = i = 0; i < units; ++i)
3897 for (j = 0; j < size; j++, k++)
3898 val = (val << 8) | arr[k];
3900 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
3901 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
3903 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
3908 return gen_rtx_CONST_VECTOR (mode, v);
3912 reloc_diagnostic (rtx x)
3914 tree loc_decl, decl = 0;
3916 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
3919 if (GET_CODE (x) == SYMBOL_REF)
3920 decl = SYMBOL_REF_DECL (x);
3921 else if (GET_CODE (x) == CONST
3922 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3923 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
3925 /* SYMBOL_REF_DECL is not necessarily a DECL. */
3926 if (decl && !DECL_P (decl))
3929 /* We use last_assemble_variable_decl to get line information. It's
3930 not always going to be right and might not even be close, but will
3931 be right for the more common cases. */
3932 if (!last_assemble_variable_decl)
3935 loc_decl = last_assemble_variable_decl;
3937 /* The decl could be a string constant. */
3938 if (decl && DECL_P (decl))
3939 msg = "%Jcreating run-time relocation for %qD";
3941 msg = "creating run-time relocation";
3943 if (TARGET_WARN_RELOC)
3944 warning (0, msg, loc_decl, decl);
3946 error (msg, loc_decl, decl);
3949 /* Hook into assemble_integer so we can generate an error for run-time
3950 relocations. The SPU ABI disallows them. */
3952 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
3954 /* By default run-time relocations aren't supported, but we allow them
3955 in case users support it in their own run-time loader. And we provide
3956 a warning for those users that don't. */
3957 if ((GET_CODE (x) == SYMBOL_REF)
3958 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
3959 reloc_diagnostic (x);
3961 return default_assemble_integer (x, size, aligned_p);
3965 spu_asm_globalize_label (FILE * file, const char *name)
3967 fputs ("\t.global\t", file);
3968 assemble_name (file, name);
3973 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
3975 enum machine_mode mode = GET_MODE (x);
3976 int cost = COSTS_N_INSNS (2);
3978 /* Folding to a CONST_VECTOR will use extra space but there might
3979 be only a small savings in cycles. We'd like to use a CONST_VECTOR
3980 only if it allows us to fold away multiple insns. Changing the cost
3981 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
3982 because this cost will only be compared against a single insn.
3983 if (code == CONST_VECTOR)
3984 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
3987 /* Use defaults for float operations. Not accurate but good enough. */
3990 *total = COSTS_N_INSNS (13);
3995 *total = COSTS_N_INSNS (6);
4001 if (satisfies_constraint_K (x))
4003 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4004 *total = COSTS_N_INSNS (1);
4006 *total = COSTS_N_INSNS (3);
4010 *total = COSTS_N_INSNS (3);
4015 *total = COSTS_N_INSNS (0);
4019 *total = COSTS_N_INSNS (5);
4023 case FLOAT_TRUNCATE:
4025 case UNSIGNED_FLOAT:
4028 *total = COSTS_N_INSNS (7);
4034 *total = COSTS_N_INSNS (9);
4041 GET_CODE (XEXP (x, 0)) ==
4042 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4043 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4045 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4047 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4048 cost = COSTS_N_INSNS (14);
4049 if ((val & 0xffff) == 0)
4050 cost = COSTS_N_INSNS (9);
4051 else if (val > 0 && val < 0x10000)
4052 cost = COSTS_N_INSNS (11);
4061 *total = COSTS_N_INSNS (20);
4068 *total = COSTS_N_INSNS (4);
4071 if (XINT (x, 1) == UNSPEC_CONVERT)
4072 *total = COSTS_N_INSNS (0);
4074 *total = COSTS_N_INSNS (4);
4077 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4078 if (GET_MODE_CLASS (mode) == MODE_INT
4079 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4080 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4081 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4087 spu_eh_return_filter_mode (void)
4089 /* We would like this to be SImode, but sjlj exceptions seems to work
4090 only with word_mode. */
4094 /* Decide whether we can make a sibling call to a function. DECL is the
4095 declaration of the function being targeted by the call and EXP is the
4096 CALL_EXPR representing the call. */
4098 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4100 return decl && !TARGET_LARGE_MEM;
4103 /* We need to correctly update the back chain pointer and the Available
4104 Stack Size (which is in the second slot of the sp register.) */
4106 spu_allocate_stack (rtx op0, rtx op1)
4109 rtx chain = gen_reg_rtx (V4SImode);
4110 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4111 rtx sp = gen_reg_rtx (V4SImode);
4112 rtx splatted = gen_reg_rtx (V4SImode);
4113 rtx pat = gen_reg_rtx (TImode);
4115 /* copy the back chain so we can save it back again. */
4116 emit_move_insn (chain, stack_bot);
4118 op1 = force_reg (SImode, op1);
4120 v = 0x1020300010203ll;
4121 emit_move_insn (pat, immed_double_const (v, v, TImode));
4122 emit_insn (gen_shufb (splatted, op1, op1, pat));
4124 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4125 emit_insn (gen_subv4si3 (sp, sp, splatted));
4127 if (flag_stack_check)
4129 rtx avail = gen_reg_rtx(SImode);
4130 rtx result = gen_reg_rtx(SImode);
4131 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4132 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4133 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4136 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4138 emit_move_insn (stack_bot, chain);
4140 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4144 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4146 static unsigned char arr[16] =
4147 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4148 rtx temp = gen_reg_rtx (SImode);
4149 rtx temp2 = gen_reg_rtx (SImode);
4150 rtx temp3 = gen_reg_rtx (V4SImode);
4151 rtx temp4 = gen_reg_rtx (V4SImode);
4152 rtx pat = gen_reg_rtx (TImode);
4153 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4155 /* Restore the backchain from the first word, sp from the second. */
4156 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4157 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4159 emit_move_insn (pat, array_to_constant (TImode, arr));
4161 /* Compute Available Stack Size for sp */
4162 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4163 emit_insn (gen_shufb (temp3, temp, temp, pat));
4165 /* Compute Available Stack Size for back chain */
4166 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4167 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4168 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4170 emit_insn (gen_addv4si3 (sp, sp, temp3));
4171 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4175 spu_init_libfuncs (void)
4177 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4178 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4179 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4180 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4181 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4182 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4183 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4184 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4185 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4186 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4187 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4189 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4190 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
4193 /* Make a subreg, stripping any existing subreg. We could possibly just
4194 call simplify_subreg, but in this case we know what we want. */
4196 spu_gen_subreg (enum machine_mode mode, rtx x)
4198 if (GET_CODE (x) == SUBREG)
4200 if (GET_MODE (x) == mode)
4202 return gen_rtx_SUBREG (mode, x, 0);
4206 spu_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
4208 return (TYPE_MODE (type) == BLKmode
4210 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4211 || int_size_in_bytes (type) >
4212 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4215 /* Create the built-in types and functions */
4217 struct spu_builtin_description spu_builtins[] = {
4218 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4219 {fcode, icode, name, type, params, NULL_TREE},
4220 #include "spu-builtins.def"
4225 spu_init_builtins (void)
4227 struct spu_builtin_description *d;
4230 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4231 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4232 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4233 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4234 V4SF_type_node = build_vector_type (float_type_node, 4);
4235 V2DF_type_node = build_vector_type (double_type_node, 2);
4237 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4238 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4239 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4240 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4242 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
4244 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4245 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4246 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4247 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4248 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4249 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4250 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4251 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4252 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4253 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4254 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4255 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4257 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4258 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4259 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4260 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4261 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4262 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4263 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4264 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4266 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4267 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4269 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4271 spu_builtin_types[SPU_BTI_PTR] =
4272 build_pointer_type (build_qualified_type
4274 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4276 /* For each builtin we build a new prototype. The tree code will make
4277 sure nodes are shared. */
4278 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4281 char name[64]; /* build_function will make a copy. */
4287 /* find last parm */
4288 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4294 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4296 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4298 sprintf (name, "__builtin_%s", d->name);
4300 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4302 if (d->fcode == SPU_MASK_FOR_LOAD)
4303 TREE_READONLY (d->fndecl) = 1;
4308 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4310 static unsigned char arr[16] =
4311 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4313 rtx temp = gen_reg_rtx (Pmode);
4314 rtx temp2 = gen_reg_rtx (V4SImode);
4315 rtx temp3 = gen_reg_rtx (V4SImode);
4316 rtx pat = gen_reg_rtx (TImode);
4317 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4319 emit_move_insn (pat, array_to_constant (TImode, arr));
4321 /* Restore the sp. */
4322 emit_move_insn (temp, op1);
4323 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4325 /* Compute available stack size for sp. */
4326 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4327 emit_insn (gen_shufb (temp3, temp, temp, pat));
4329 emit_insn (gen_addv4si3 (sp, sp, temp3));
4330 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4334 spu_safe_dma (HOST_WIDE_INT channel)
4336 return (channel >= 21 && channel <= 27);
4340 spu_builtin_splats (rtx ops[])
4342 enum machine_mode mode = GET_MODE (ops[0]);
4343 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4345 unsigned char arr[16];
4346 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4347 emit_move_insn (ops[0], array_to_constant (mode, arr));
4349 else if (GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4351 rtvec v = rtvec_alloc (4);
4352 RTVEC_ELT (v, 0) = ops[1];
4353 RTVEC_ELT (v, 1) = ops[1];
4354 RTVEC_ELT (v, 2) = ops[1];
4355 RTVEC_ELT (v, 3) = ops[1];
4356 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4360 rtx reg = gen_reg_rtx (TImode);
4362 if (GET_CODE (ops[1]) != REG
4363 && GET_CODE (ops[1]) != SUBREG)
4364 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4370 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4376 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4381 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4386 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4392 emit_move_insn (reg, shuf);
4393 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4398 spu_builtin_extract (rtx ops[])
4400 enum machine_mode mode;
4403 mode = GET_MODE (ops[1]);
4405 if (GET_CODE (ops[2]) == CONST_INT)
4410 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4413 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4416 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4419 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4422 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4425 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4433 from = spu_gen_subreg (TImode, ops[1]);
4434 rot = gen_reg_rtx (TImode);
4435 tmp = gen_reg_rtx (SImode);
4440 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4443 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4444 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4448 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4452 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4457 emit_insn (gen_rotqby_ti (rot, from, tmp));
4459 emit_insn (gen_spu_convert (ops[0], rot));
4463 spu_builtin_insert (rtx ops[])
4465 enum machine_mode mode = GET_MODE (ops[0]);
4466 enum machine_mode imode = GET_MODE_INNER (mode);
4467 rtx mask = gen_reg_rtx (TImode);
4470 if (GET_CODE (ops[3]) == CONST_INT)
4471 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4474 offset = gen_reg_rtx (SImode);
4475 emit_insn (gen_mulsi3
4476 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4479 (mask, stack_pointer_rtx, offset,
4480 GEN_INT (GET_MODE_SIZE (imode))));
4481 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4485 spu_builtin_promote (rtx ops[])
4487 enum machine_mode mode, imode;
4488 rtx rot, from, offset;
4491 mode = GET_MODE (ops[0]);
4492 imode = GET_MODE_INNER (mode);
4494 from = gen_reg_rtx (TImode);
4495 rot = spu_gen_subreg (TImode, ops[0]);
4497 emit_insn (gen_spu_convert (from, ops[1]));
4499 if (GET_CODE (ops[2]) == CONST_INT)
4501 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4502 if (GET_MODE_SIZE (imode) < 4)
4503 pos += 4 - GET_MODE_SIZE (imode);
4504 offset = GEN_INT (pos & 15);
4508 offset = gen_reg_rtx (SImode);
4512 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4515 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4516 emit_insn (gen_addsi3 (offset, offset, offset));
4520 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4521 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4525 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4531 emit_insn (gen_rotqby_ti (rot, from, offset));
4535 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4537 rtx shuf = gen_reg_rtx (V4SImode);
4538 rtx insn = gen_reg_rtx (V4SImode);
4543 fnaddr = force_reg (SImode, fnaddr);
4544 cxt = force_reg (SImode, cxt);
4546 if (TARGET_LARGE_MEM)
4548 rtx rotl = gen_reg_rtx (V4SImode);
4549 rtx mask = gen_reg_rtx (V4SImode);
4550 rtx bi = gen_reg_rtx (SImode);
4551 unsigned char shufa[16] = {
4552 2, 3, 0, 1, 18, 19, 16, 17,
4553 0, 1, 2, 3, 16, 17, 18, 19
4555 unsigned char insna[16] = {
4557 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4559 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4562 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4563 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4565 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4566 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4567 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4568 emit_insn (gen_selb (insn, insnc, rotl, mask));
4570 mem = memory_address (Pmode, tramp);
4571 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4573 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4574 mem = memory_address (Pmode, plus_constant (tramp, 16));
4575 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4579 rtx scxt = gen_reg_rtx (SImode);
4580 rtx sfnaddr = gen_reg_rtx (SImode);
4581 unsigned char insna[16] = {
4582 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4588 shufc = gen_reg_rtx (TImode);
4589 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4591 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4592 fits 18 bits and the last 4 are zeros. This will be true if
4593 the stack pointer is initialized to 0x3fff0 at program start,
4594 otherwise the ila instruction will be garbage. */
4596 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4597 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4599 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4600 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4601 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4603 mem = memory_address (Pmode, tramp);
4604 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4607 emit_insn (gen_sync ());
4611 spu_expand_sign_extend (rtx ops[])
4613 unsigned char arr[16];
4614 rtx pat = gen_reg_rtx (TImode);
4617 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4618 if (GET_MODE (ops[1]) == QImode)
4620 sign = gen_reg_rtx (HImode);
4621 emit_insn (gen_extendqihi2 (sign, ops[1]));
4622 for (i = 0; i < 16; i++)
4628 for (i = 0; i < 16; i++)
4630 switch (GET_MODE (ops[1]))
4633 sign = gen_reg_rtx (SImode);
4634 emit_insn (gen_extendhisi2 (sign, ops[1]));
4636 arr[last - 1] = 0x02;
4639 sign = gen_reg_rtx (SImode);
4640 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4641 for (i = 0; i < 4; i++)
4642 arr[last - i] = 3 - i;
4645 sign = gen_reg_rtx (SImode);
4646 c = gen_reg_rtx (SImode);
4647 emit_insn (gen_spu_convert (c, ops[1]));
4648 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4649 for (i = 0; i < 8; i++)
4650 arr[last - i] = 7 - i;
4656 emit_move_insn (pat, array_to_constant (TImode, arr));
4657 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4660 /* expand vector initialization. If there are any constant parts,
4661 load constant parts first. Then load any non-constant parts. */
4663 spu_expand_vector_init (rtx target, rtx vals)
4665 enum machine_mode mode = GET_MODE (target);
4666 int n_elts = GET_MODE_NUNITS (mode);
4668 bool all_same = true;
4669 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
4672 first = XVECEXP (vals, 0, 0);
4673 for (i = 0; i < n_elts; ++i)
4675 x = XVECEXP (vals, 0, i);
4676 if (!CONSTANT_P (x))
4680 if (first_constant == NULL_RTX)
4683 if (i > 0 && !rtx_equal_p (x, first))
4687 /* if all elements are the same, use splats to repeat elements */
4690 if (!CONSTANT_P (first)
4691 && !register_operand (first, GET_MODE (x)))
4692 first = force_reg (GET_MODE (first), first);
4693 emit_insn (gen_spu_splats (target, first));
4697 /* load constant parts */
4698 if (n_var != n_elts)
4702 emit_move_insn (target,
4703 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4707 rtx constant_parts_rtx = copy_rtx (vals);
4709 gcc_assert (first_constant != NULL_RTX);
4710 /* fill empty slots with the first constant, this increases
4711 our chance of using splats in the recursive call below. */
4712 for (i = 0; i < n_elts; ++i)
4713 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4714 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4716 spu_expand_vector_init (target, constant_parts_rtx);
4720 /* load variable parts */
4723 rtx insert_operands[4];
4725 insert_operands[0] = target;
4726 insert_operands[2] = target;
4727 for (i = 0; i < n_elts; ++i)
4729 x = XVECEXP (vals, 0, i);
4730 if (!CONSTANT_P (x))
4732 if (!register_operand (x, GET_MODE (x)))
4733 x = force_reg (GET_MODE (x), x);
4734 insert_operands[1] = x;
4735 insert_operands[3] = GEN_INT (i);
4736 spu_builtin_insert (insert_operands);
4743 spu_force_reg (enum machine_mode mode, rtx op)
4746 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
4748 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
4749 || GET_MODE (op) == BLKmode)
4750 return force_reg (mode, convert_to_mode (mode, op, 0));
4754 r = force_reg (GET_MODE (op), op);
4755 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
4757 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
4762 x = gen_reg_rtx (mode);
4763 emit_insn (gen_spu_convert (x, r));
4768 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
4770 HOST_WIDE_INT v = 0;
4772 /* Check the range of immediate operands. */
4773 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
4775 int range = p - SPU_BTI_7;
4776 if (!CONSTANT_P (op)
4777 || (GET_CODE (op) == CONST_INT
4778 && (INTVAL (op) < spu_builtin_range[range].low
4779 || INTVAL (op) > spu_builtin_range[range].high)))
4780 error ("%s expects an integer literal in the range [%d, %d].",
4782 spu_builtin_range[range].low, spu_builtin_range[range].high);
4784 if (GET_CODE (op) == CONST
4785 && (GET_CODE (XEXP (op, 0)) == PLUS
4786 || GET_CODE (XEXP (op, 0)) == MINUS))
4788 v = INTVAL (XEXP (XEXP (op, 0), 1));
4789 op = XEXP (XEXP (op, 0), 0);
4791 else if (GET_CODE (op) == CONST_INT)
4800 /* This is only used in lqa, and stqa. Even though the insns
4801 encode 16 bits of the address (all but the 2 least
4802 significant), only 14 bits are used because it is masked to
4803 be 16 byte aligned. */
4807 /* This is used for lqr and stqr. */
4814 if (GET_CODE (op) == LABEL_REF
4815 || (GET_CODE (op) == SYMBOL_REF
4816 && SYMBOL_REF_FUNCTION_P (op))
4817 || (INTVAL (op) & ((1 << lsbits) - 1)) != 0)
4818 warning (0, "%d least significant bits of %s are ignored.", lsbits,
4825 expand_builtin_args (struct spu_builtin_description *d, tree arglist,
4826 rtx target, rtx ops[])
4828 enum insn_code icode = d->icode;
4831 /* Expand the arguments into rtl. */
4833 if (d->parm[0] != SPU_BTI_VOID)
4836 for (; i < insn_data[icode].n_operands; i++)
4838 tree arg = TREE_VALUE (arglist);
4841 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
4842 arglist = TREE_CHAIN (arglist);
4847 spu_expand_builtin_1 (struct spu_builtin_description *d,
4848 tree arglist, rtx target)
4852 enum insn_code icode = d->icode;
4853 enum machine_mode mode, tmode;
4857 /* Set up ops[] with values from arglist. */
4858 expand_builtin_args (d, arglist, target, ops);
4860 /* Handle the target operand which must be operand 0. */
4862 if (d->parm[0] != SPU_BTI_VOID)
4865 /* We prefer the mode specified for the match_operand otherwise
4866 use the mode from the builtin function prototype. */
4867 tmode = insn_data[d->icode].operand[0].mode;
4868 if (tmode == VOIDmode)
4869 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
4871 /* Try to use target because not using it can lead to extra copies
4872 and when we are using all of the registers extra copies leads
4874 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
4877 target = ops[0] = gen_reg_rtx (tmode);
4879 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
4885 if (d->fcode == SPU_MASK_FOR_LOAD)
4887 enum machine_mode mode = insn_data[icode].operand[1].mode;
4892 arg = TREE_VALUE (arglist);
4893 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
4894 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
4895 addr = memory_address (mode, op);
4898 op = gen_reg_rtx (GET_MODE (addr));
4899 emit_insn (gen_rtx_SET (VOIDmode, op,
4900 gen_rtx_NEG (GET_MODE (addr), addr)));
4901 op = gen_rtx_MEM (mode, op);
4903 pat = GEN_FCN (icode) (target, op);
4910 /* Ignore align_hint, but still expand it's args in case they have
4912 if (icode == CODE_FOR_spu_align_hint)
4915 /* Handle the rest of the operands. */
4916 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
4918 if (insn_data[d->icode].operand[i].mode != VOIDmode)
4919 mode = insn_data[d->icode].operand[i].mode;
4921 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
4923 /* mode can be VOIDmode here for labels */
4925 /* For specific intrinsics with an immediate operand, e.g.,
4926 si_ai(), we sometimes need to convert the scalar argument to a
4927 vector argument by splatting the scalar. */
4928 if (VECTOR_MODE_P (mode)
4929 && (GET_CODE (ops[i]) == CONST_INT
4930 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
4931 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
4933 if (GET_CODE (ops[i]) == CONST_INT)
4934 ops[i] = spu_const (mode, INTVAL (ops[i]));
4937 rtx reg = gen_reg_rtx (mode);
4938 enum machine_mode imode = GET_MODE_INNER (mode);
4939 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
4940 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
4941 if (imode != GET_MODE (ops[i]))
4942 ops[i] = convert_to_mode (imode, ops[i],
4943 TYPE_UNSIGNED (spu_builtin_types
4945 emit_insn (gen_spu_splats (reg, ops[i]));
4950 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
4951 ops[i] = spu_force_reg (mode, ops[i]);
4953 spu_check_builtin_parm (d, ops[i], d->parm[p]);
4956 switch (insn_data[icode].n_operands)
4959 pat = GEN_FCN (icode) (0);
4962 pat = GEN_FCN (icode) (ops[0]);
4965 pat = GEN_FCN (icode) (ops[0], ops[1]);
4968 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
4971 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
4974 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
4977 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
4986 if (d->type == B_CALL || d->type == B_BISLED)
4987 emit_call_insn (pat);
4988 else if (d->type == B_JUMP)
4990 emit_jump_insn (pat);
4996 return_type = spu_builtin_types[d->parm[0]];
4997 if (d->parm[0] != SPU_BTI_VOID
4998 && GET_MODE (target) != TYPE_MODE (return_type))
5000 /* target is the return value. It should always be the mode of
5001 the builtin function prototype. */
5002 target = spu_force_reg (TYPE_MODE (return_type), target);
5009 spu_expand_builtin (tree exp,
5011 rtx subtarget ATTRIBUTE_UNUSED,
5012 enum machine_mode mode ATTRIBUTE_UNUSED,
5013 int ignore ATTRIBUTE_UNUSED)
5015 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
5016 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
5017 tree arglist = TREE_OPERAND (exp, 1);
5018 struct spu_builtin_description *d;
5020 if (fcode < NUM_SPU_BUILTINS)
5022 d = &spu_builtins[fcode];
5024 return spu_expand_builtin_1 (d, arglist, target);
5029 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5031 spu_builtin_mul_widen_even (tree type)
5033 switch (TYPE_MODE (type))
5036 if (TYPE_UNSIGNED (type))
5037 return spu_builtins[SPU_MULE_0].fndecl;
5039 return spu_builtins[SPU_MULE_1].fndecl;
5046 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5048 spu_builtin_mul_widen_odd (tree type)
5050 switch (TYPE_MODE (type))
5053 if (TYPE_UNSIGNED (type))
5054 return spu_builtins[SPU_MULO_1].fndecl;
5056 return spu_builtins[SPU_MULO_0].fndecl;
5063 /* Implement targetm.vectorize.builtin_mask_for_load. */
5065 spu_builtin_mask_for_load (void)
5067 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];