1 /* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
61 /* Builtin types, data and prototypes. */
62 struct spu_builtin_range
67 static struct spu_builtin_range spu_builtin_range[] = {
68 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
69 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
70 {0ll, 0x7fll}, /* SPU_BTI_U7 */
71 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
72 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
73 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
74 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
75 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
76 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
77 {0ll, 0xffffll}, /* SPU_BTI_U16 */
78 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
79 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
83 /* Target specific attribute specifications. */
84 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
86 /* Prototypes and external defs. */
87 static void spu_init_builtins (void);
88 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
89 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
90 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
91 static rtx get_pic_reg (void);
92 static int need_to_save_reg (int regno, int saving);
93 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
94 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
95 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
97 static void emit_nop_for_insn (rtx insn);
98 static bool insn_clobbers_hbr (rtx insn);
99 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
100 int distance, sbitmap blocks);
101 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
102 enum machine_mode dmode);
103 static rtx get_branch_target (rtx branch);
104 static void spu_machine_dependent_reorg (void);
105 static int spu_sched_issue_rate (void);
106 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
108 static int get_pipe (rtx insn);
109 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110 static void spu_sched_init_global (FILE *, int, int);
111 static void spu_sched_init (FILE *, int, int);
112 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
113 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
115 unsigned char *no_add_attrs);
116 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
118 unsigned char *no_add_attrs);
119 static int spu_naked_function_p (tree func);
120 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
121 const_tree type, unsigned char named);
122 static tree spu_build_builtin_va_list (void);
123 static void spu_va_start (tree, rtx);
124 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
125 gimple_seq * pre_p, gimple_seq * post_p);
126 static int regno_aligned_for_load (int regno);
127 static int store_with_one_insn_p (rtx mem);
128 static int mem_is_padded_component_ref (rtx x);
129 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
130 static void spu_asm_globalize_label (FILE * file, const char *name);
131 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
132 int *total, bool speed);
133 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
134 static void spu_init_libfuncs (void);
135 static bool spu_return_in_memory (const_tree type, const_tree fntype);
136 static void fix_range (const char *);
137 static void spu_encode_section_info (tree, rtx, int);
138 static tree spu_builtin_mul_widen_even (tree);
139 static tree spu_builtin_mul_widen_odd (tree);
140 static tree spu_builtin_mask_for_load (void);
141 static int spu_builtin_vectorization_cost (bool);
142 static bool spu_vector_alignment_reachable (const_tree, bool);
143 static tree spu_builtin_vec_perm (tree, tree *);
144 static int spu_sms_res_mii (struct ddg *g);
145 static void asm_file_start (void);
147 extern const char *reg_names[];
148 rtx spu_compare_op0, spu_compare_op1;
150 /* Which instruction set architecture to use. */
152 /* Which cpu are we tuning for. */
155 /* The hardware requires 8 insns between a hint and the branch it
156 effects. This variable describes how many rtl instructions the
157 compiler needs to see before inserting a hint, and then the compiler
158 will insert enough nops to make it at least 8 insns. The default is
159 for the compiler to allow up to 2 nops be emitted. The nops are
160 inserted in pairs, so we round down. */
161 int spu_hint_dist = (8*4) - (2*4);
163 /* Determines whether we run variable tracking in machine dependent
165 static int spu_flag_var_tracking;
180 IC_POOL, /* constant pool */
181 IC_IL1, /* one il* instruction */
182 IC_IL2, /* both ilhu and iohl instructions */
183 IC_IL1s, /* one il* instruction */
184 IC_IL2s, /* both ilhu and iohl instructions */
185 IC_FSMBI, /* the fsmbi instruction */
186 IC_CPAT, /* one of the c*d instructions */
187 IC_FSMBI2 /* fsmbi plus 1 other instruction */
190 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
191 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
192 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
193 static enum immediate_class classify_immediate (rtx op,
194 enum machine_mode mode);
196 static enum machine_mode spu_unwind_word_mode (void);
198 static enum machine_mode
199 spu_libgcc_cmp_return_mode (void);
201 static enum machine_mode
202 spu_libgcc_shift_count_mode (void);
204 /* Built in types. */
205 tree spu_builtin_types[SPU_BTI_MAX];
207 /* TARGET overrides. */
209 #undef TARGET_INIT_BUILTINS
210 #define TARGET_INIT_BUILTINS spu_init_builtins
212 #undef TARGET_EXPAND_BUILTIN
213 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
215 #undef TARGET_UNWIND_WORD_MODE
216 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
218 /* The .8byte directive doesn't seem to work well for a 32 bit
220 #undef TARGET_ASM_UNALIGNED_DI_OP
221 #define TARGET_ASM_UNALIGNED_DI_OP NULL
223 #undef TARGET_RTX_COSTS
224 #define TARGET_RTX_COSTS spu_rtx_costs
226 #undef TARGET_ADDRESS_COST
227 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
229 #undef TARGET_SCHED_ISSUE_RATE
230 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
232 #undef TARGET_SCHED_INIT_GLOBAL
233 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
235 #undef TARGET_SCHED_INIT
236 #define TARGET_SCHED_INIT spu_sched_init
238 #undef TARGET_SCHED_VARIABLE_ISSUE
239 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
241 #undef TARGET_SCHED_REORDER
242 #define TARGET_SCHED_REORDER spu_sched_reorder
244 #undef TARGET_SCHED_REORDER2
245 #define TARGET_SCHED_REORDER2 spu_sched_reorder
247 #undef TARGET_SCHED_ADJUST_COST
248 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
250 const struct attribute_spec spu_attribute_table[];
251 #undef TARGET_ATTRIBUTE_TABLE
252 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
254 #undef TARGET_ASM_INTEGER
255 #define TARGET_ASM_INTEGER spu_assemble_integer
257 #undef TARGET_SCALAR_MODE_SUPPORTED_P
258 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
260 #undef TARGET_VECTOR_MODE_SUPPORTED_P
261 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
263 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
264 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
266 #undef TARGET_ASM_GLOBALIZE_LABEL
267 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
269 #undef TARGET_PASS_BY_REFERENCE
270 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
272 #undef TARGET_MUST_PASS_IN_STACK
273 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
275 #undef TARGET_BUILD_BUILTIN_VA_LIST
276 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
278 #undef TARGET_EXPAND_BUILTIN_VA_START
279 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
281 #undef TARGET_SETUP_INCOMING_VARARGS
282 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
284 #undef TARGET_MACHINE_DEPENDENT_REORG
285 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
287 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
288 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
290 #undef TARGET_DEFAULT_TARGET_FLAGS
291 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
293 #undef TARGET_INIT_LIBFUNCS
294 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
296 #undef TARGET_RETURN_IN_MEMORY
297 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
299 #undef TARGET_ENCODE_SECTION_INFO
300 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
302 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
303 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
305 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
306 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
308 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
309 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
311 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
312 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
314 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
315 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
317 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
318 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
320 #undef TARGET_LIBGCC_CMP_RETURN_MODE
321 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
323 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
324 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
326 #undef TARGET_SCHED_SMS_RES_MII
327 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
329 #undef TARGET_ASM_FILE_START
330 #define TARGET_ASM_FILE_START asm_file_start
332 struct gcc_target targetm = TARGET_INITIALIZER;
335 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
337 /* Override some of the default param values. With so many registers
338 larger values are better for these params. */
339 MAX_PENDING_LIST_LENGTH = 128;
341 /* With so many registers this is better on by default. */
342 flag_rename_registers = 1;
345 /* Sometimes certain combinations of command options do not make sense
346 on a particular target machine. You can define a macro
347 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
348 executed once just after all the command options have been parsed. */
350 spu_override_options (void)
352 /* Small loops will be unpeeled at -O3. For SPU it is more important
353 to keep code small by default. */
354 if (!flag_unroll_loops && !flag_peel_loops
355 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
356 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
358 flag_omit_frame_pointer = 1;
360 /* Functions must be 8 byte aligned so we correctly handle dual issue */
361 if (align_functions < 8)
364 spu_hint_dist = 8*4 - spu_max_nops*4;
365 if (spu_hint_dist < 0)
368 if (spu_fixed_range_string)
369 fix_range (spu_fixed_range_string);
371 /* Determine processor architectural level. */
374 if (strcmp (&spu_arch_string[0], "cell") == 0)
375 spu_arch = PROCESSOR_CELL;
376 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
377 spu_arch = PROCESSOR_CELLEDP;
379 error ("Unknown architecture '%s'", &spu_arch_string[0]);
382 /* Determine processor to tune for. */
385 if (strcmp (&spu_tune_string[0], "cell") == 0)
386 spu_tune = PROCESSOR_CELL;
387 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
388 spu_tune = PROCESSOR_CELLEDP;
390 error ("Unknown architecture '%s'", &spu_tune_string[0]);
393 /* Change defaults according to the processor architecture. */
394 if (spu_arch == PROCESSOR_CELLEDP)
396 /* If no command line option has been otherwise specified, change
397 the default to -mno-safe-hints on celledp -- only the original
398 Cell/B.E. processors require this workaround. */
399 if (!(target_flags_explicit & MASK_SAFE_HINTS))
400 target_flags &= ~MASK_SAFE_HINTS;
403 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
406 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
407 struct attribute_spec.handler. */
409 /* Table of machine attributes. */
410 const struct attribute_spec spu_attribute_table[] =
412 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
413 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
414 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
415 { NULL, 0, 0, false, false, false, NULL }
418 /* True if MODE is valid for the target. By "valid", we mean able to
419 be manipulated in non-trivial ways. In particular, this means all
420 the arithmetic is supported. */
422 spu_scalar_mode_supported_p (enum machine_mode mode)
440 /* Similarly for vector modes. "Supported" here is less strict. At
441 least some operations are supported; need to check optabs or builtins
442 for further details. */
444 spu_vector_mode_supported_p (enum machine_mode mode)
461 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
462 least significant bytes of the outer mode. This function returns
463 TRUE for the SUBREG's where this is correct. */
465 valid_subreg (rtx op)
467 enum machine_mode om = GET_MODE (op);
468 enum machine_mode im = GET_MODE (SUBREG_REG (op));
469 return om != VOIDmode && im != VOIDmode
470 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
471 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
472 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
475 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
476 and adjust the start offset. */
478 adjust_operand (rtx op, HOST_WIDE_INT * start)
480 enum machine_mode mode;
482 /* Strip any paradoxical SUBREG. */
483 if (GET_CODE (op) == SUBREG
484 && (GET_MODE_BITSIZE (GET_MODE (op))
485 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
489 GET_MODE_BITSIZE (GET_MODE (op)) -
490 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
491 op = SUBREG_REG (op);
493 /* If it is smaller than SI, assure a SUBREG */
494 op_size = GET_MODE_BITSIZE (GET_MODE (op));
498 *start += 32 - op_size;
501 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
502 mode = mode_for_size (op_size, MODE_INT, 0);
503 if (mode != GET_MODE (op))
504 op = gen_rtx_SUBREG (mode, op, 0);
509 spu_expand_extv (rtx ops[], int unsignedp)
511 HOST_WIDE_INT width = INTVAL (ops[2]);
512 HOST_WIDE_INT start = INTVAL (ops[3]);
513 HOST_WIDE_INT src_size, dst_size;
514 enum machine_mode src_mode, dst_mode;
515 rtx dst = ops[0], src = ops[1];
518 dst = adjust_operand (ops[0], 0);
519 dst_mode = GET_MODE (dst);
520 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
522 src = adjust_operand (src, &start);
523 src_mode = GET_MODE (src);
524 src_size = GET_MODE_BITSIZE (GET_MODE (src));
528 s = gen_reg_rtx (src_mode);
532 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
535 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
538 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
546 if (width < src_size)
553 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
556 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
559 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
564 s = gen_reg_rtx (src_mode);
565 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
570 convert_move (dst, src, unsignedp);
574 spu_expand_insv (rtx ops[])
576 HOST_WIDE_INT width = INTVAL (ops[1]);
577 HOST_WIDE_INT start = INTVAL (ops[2]);
578 HOST_WIDE_INT maskbits;
579 enum machine_mode dst_mode, src_mode;
580 rtx dst = ops[0], src = ops[3];
581 int dst_size, src_size;
587 if (GET_CODE (ops[0]) == MEM)
588 dst = gen_reg_rtx (TImode);
590 dst = adjust_operand (dst, &start);
591 dst_mode = GET_MODE (dst);
592 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
594 if (CONSTANT_P (src))
596 enum machine_mode m =
597 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
598 src = force_reg (m, convert_to_mode (m, src, 0));
600 src = adjust_operand (src, 0);
601 src_mode = GET_MODE (src);
602 src_size = GET_MODE_BITSIZE (GET_MODE (src));
604 mask = gen_reg_rtx (dst_mode);
605 shift_reg = gen_reg_rtx (dst_mode);
606 shift = dst_size - start - width;
608 /* It's not safe to use subreg here because the compiler assumes
609 that the SUBREG_REG is right justified in the SUBREG. */
610 convert_move (shift_reg, src, 1);
617 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
620 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
623 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
635 maskbits = (-1ll << (32 - width - start));
637 maskbits += (1ll << (32 - start));
638 emit_move_insn (mask, GEN_INT (maskbits));
641 maskbits = (-1ll << (64 - width - start));
643 maskbits += (1ll << (64 - start));
644 emit_move_insn (mask, GEN_INT (maskbits));
648 unsigned char arr[16];
650 memset (arr, 0, sizeof (arr));
651 arr[i] = 0xff >> (start & 7);
652 for (i++; i <= (start + width - 1) / 8; i++)
654 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
655 emit_move_insn (mask, array_to_constant (TImode, arr));
661 if (GET_CODE (ops[0]) == MEM)
663 rtx aligned = gen_reg_rtx (SImode);
664 rtx low = gen_reg_rtx (SImode);
665 rtx addr = gen_reg_rtx (SImode);
666 rtx rotl = gen_reg_rtx (SImode);
667 rtx mask0 = gen_reg_rtx (TImode);
670 emit_move_insn (addr, XEXP (ops[0], 0));
671 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
672 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
673 emit_insn (gen_negsi2 (rotl, low));
674 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
675 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
676 mem = change_address (ops[0], TImode, aligned);
677 set_mem_alias_set (mem, 0);
678 emit_move_insn (dst, mem);
679 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
680 emit_move_insn (mem, dst);
681 if (start + width > MEM_ALIGN (ops[0]))
683 rtx shl = gen_reg_rtx (SImode);
684 rtx mask1 = gen_reg_rtx (TImode);
685 rtx dst1 = gen_reg_rtx (TImode);
687 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
688 emit_insn (gen_shlqby_ti (mask1, mask, shl));
689 mem1 = adjust_address (mem, TImode, 16);
690 set_mem_alias_set (mem1, 0);
691 emit_move_insn (dst1, mem1);
692 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
693 emit_move_insn (mem1, dst1);
697 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
702 spu_expand_block_move (rtx ops[])
704 HOST_WIDE_INT bytes, align, offset;
705 rtx src, dst, sreg, dreg, target;
707 if (GET_CODE (ops[2]) != CONST_INT
708 || GET_CODE (ops[3]) != CONST_INT
709 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
712 bytes = INTVAL (ops[2]);
713 align = INTVAL (ops[3]);
723 for (offset = 0; offset + 16 <= bytes; offset += 16)
725 dst = adjust_address (ops[0], V16QImode, offset);
726 src = adjust_address (ops[1], V16QImode, offset);
727 emit_move_insn (dst, src);
732 unsigned char arr[16] = { 0 };
733 for (i = 0; i < bytes - offset; i++)
735 dst = adjust_address (ops[0], V16QImode, offset);
736 src = adjust_address (ops[1], V16QImode, offset);
737 mask = gen_reg_rtx (V16QImode);
738 sreg = gen_reg_rtx (V16QImode);
739 dreg = gen_reg_rtx (V16QImode);
740 target = gen_reg_rtx (V16QImode);
741 emit_move_insn (mask, array_to_constant (V16QImode, arr));
742 emit_move_insn (dreg, dst);
743 emit_move_insn (sreg, src);
744 emit_insn (gen_selb (target, dreg, sreg, mask));
745 emit_move_insn (dst, target);
753 { SPU_EQ, SPU_GT, SPU_GTU };
755 int spu_comp_icode[12][3] = {
756 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
757 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
758 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
759 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
760 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
761 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
762 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
763 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
764 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
765 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
766 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
767 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
770 /* Generate a compare for CODE. Return a brand-new rtx that represents
771 the result of the compare. GCC can figure this out too if we don't
772 provide all variations of compares, but GCC always wants to use
773 WORD_MODE, we can generate better code in most cases if we do it
776 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
778 int reverse_compare = 0;
779 int reverse_test = 0;
780 rtx compare_result, eq_result;
781 rtx comp_rtx, eq_rtx;
782 rtx target = operands[0];
783 enum machine_mode comp_mode;
784 enum machine_mode op_mode;
785 enum spu_comp_code scode, eq_code, ior_code;
789 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
790 and so on, to keep the constant in operand 1. */
791 if (GET_CODE (spu_compare_op1) == CONST_INT)
793 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
794 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
798 spu_compare_op1 = GEN_INT (val);
802 spu_compare_op1 = GEN_INT (val);
806 spu_compare_op1 = GEN_INT (val);
810 spu_compare_op1 = GEN_INT (val);
819 op_mode = GET_MODE (spu_compare_op0);
825 if (HONOR_NANS (op_mode))
840 if (HONOR_NANS (op_mode))
932 comp_mode = V4SImode;
936 comp_mode = V2DImode;
943 if (GET_MODE (spu_compare_op1) == DFmode
944 && (scode != SPU_GT && scode != SPU_EQ))
947 if (is_set == 0 && spu_compare_op1 == const0_rtx
948 && (GET_MODE (spu_compare_op0) == SImode
949 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
951 /* Don't need to set a register with the result when we are
952 comparing against zero and branching. */
953 reverse_test = !reverse_test;
954 compare_result = spu_compare_op0;
958 compare_result = gen_reg_rtx (comp_mode);
962 rtx t = spu_compare_op1;
963 spu_compare_op1 = spu_compare_op0;
967 if (spu_comp_icode[index][scode] == 0)
970 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
971 (spu_compare_op0, op_mode))
972 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
973 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
974 (spu_compare_op1, op_mode))
975 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
976 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
981 emit_insn (comp_rtx);
985 eq_result = gen_reg_rtx (comp_mode);
986 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
992 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
993 gcc_assert (ior_code != CODE_FOR_nothing);
994 emit_insn (GEN_FCN (ior_code)
995 (compare_result, compare_result, eq_result));
1004 /* We don't have branch on QI compare insns, so we convert the
1005 QI compare result to a HI result. */
1006 if (comp_mode == QImode)
1008 rtx old_res = compare_result;
1009 compare_result = gen_reg_rtx (HImode);
1011 emit_insn (gen_extendqihi2 (compare_result, old_res));
1015 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1017 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1019 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1020 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1021 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1024 else if (is_set == 2)
1026 int compare_size = GET_MODE_BITSIZE (comp_mode);
1027 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1028 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1030 rtx op_t = operands[2];
1031 rtx op_f = operands[3];
1033 /* The result of the comparison can be SI, HI or QI mode. Create a
1034 mask based on that result. */
1035 if (target_size > compare_size)
1037 select_mask = gen_reg_rtx (mode);
1038 emit_insn (gen_extend_compare (select_mask, compare_result));
1040 else if (target_size < compare_size)
1042 gen_rtx_SUBREG (mode, compare_result,
1043 (compare_size - target_size) / BITS_PER_UNIT);
1044 else if (comp_mode != mode)
1045 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1047 select_mask = compare_result;
1049 if (GET_MODE (target) != GET_MODE (op_t)
1050 || GET_MODE (target) != GET_MODE (op_f))
1054 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1056 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1061 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1062 gen_rtx_NOT (comp_mode, compare_result)));
1063 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1064 emit_insn (gen_extendhisi2 (target, compare_result));
1065 else if (GET_MODE (target) == SImode
1066 && GET_MODE (compare_result) == QImode)
1067 emit_insn (gen_extend_compare (target, compare_result));
1069 emit_move_insn (target, compare_result);
1074 const_double_to_hwint (rtx x)
1078 if (GET_MODE (x) == SFmode)
1080 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1081 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1083 else if (GET_MODE (x) == DFmode)
1086 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1087 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1089 val = (val << 32) | (l[1] & 0xffffffff);
1097 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1101 gcc_assert (mode == SFmode || mode == DFmode);
1104 tv[0] = (v << 32) >> 32;
1105 else if (mode == DFmode)
1107 tv[1] = (v << 32) >> 32;
1110 real_from_target (&rv, tv, mode);
1111 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1115 print_operand_address (FILE * file, register rtx addr)
1120 if (GET_CODE (addr) == AND
1121 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1122 && INTVAL (XEXP (addr, 1)) == -16)
1123 addr = XEXP (addr, 0);
1125 switch (GET_CODE (addr))
1128 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1132 reg = XEXP (addr, 0);
1133 offset = XEXP (addr, 1);
1134 if (GET_CODE (offset) == REG)
1136 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1137 reg_names[REGNO (offset)]);
1139 else if (GET_CODE (offset) == CONST_INT)
1141 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1142 INTVAL (offset), reg_names[REGNO (reg)]);
1152 output_addr_const (file, addr);
1162 print_operand (FILE * file, rtx x, int code)
1164 enum machine_mode mode = GET_MODE (x);
1166 unsigned char arr[16];
1167 int xcode = GET_CODE (x);
1169 if (GET_MODE (x) == VOIDmode)
1172 case 'L': /* 128 bits, signed */
1173 case 'm': /* 128 bits, signed */
1174 case 'T': /* 128 bits, signed */
1175 case 't': /* 128 bits, signed */
1178 case 'K': /* 64 bits, signed */
1179 case 'k': /* 64 bits, signed */
1180 case 'D': /* 64 bits, signed */
1181 case 'd': /* 64 bits, signed */
1184 case 'J': /* 32 bits, signed */
1185 case 'j': /* 32 bits, signed */
1186 case 's': /* 32 bits, signed */
1187 case 'S': /* 32 bits, signed */
1194 case 'j': /* 32 bits, signed */
1195 case 'k': /* 64 bits, signed */
1196 case 'm': /* 128 bits, signed */
1197 if (xcode == CONST_INT
1198 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1200 gcc_assert (logical_immediate_p (x, mode));
1201 constant_to_array (mode, x, arr);
1202 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1203 val = trunc_int_for_mode (val, SImode);
1204 switch (which_logical_immediate (val))
1209 fprintf (file, "h");
1212 fprintf (file, "b");
1222 case 'J': /* 32 bits, signed */
1223 case 'K': /* 64 bits, signed */
1224 case 'L': /* 128 bits, signed */
1225 if (xcode == CONST_INT
1226 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1228 gcc_assert (logical_immediate_p (x, mode)
1229 || iohl_immediate_p (x, mode));
1230 constant_to_array (mode, x, arr);
1231 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1232 val = trunc_int_for_mode (val, SImode);
1233 switch (which_logical_immediate (val))
1239 val = trunc_int_for_mode (val, HImode);
1242 val = trunc_int_for_mode (val, QImode);
1247 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1253 case 't': /* 128 bits, signed */
1254 case 'd': /* 64 bits, signed */
1255 case 's': /* 32 bits, signed */
1258 enum immediate_class c = classify_immediate (x, mode);
1262 constant_to_array (mode, x, arr);
1263 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1264 val = trunc_int_for_mode (val, SImode);
1265 switch (which_immediate_load (val))
1270 fprintf (file, "a");
1273 fprintf (file, "h");
1276 fprintf (file, "hu");
1283 constant_to_array (mode, x, arr);
1284 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1286 fprintf (file, "b");
1288 fprintf (file, "h");
1290 fprintf (file, "w");
1292 fprintf (file, "d");
1295 if (xcode == CONST_VECTOR)
1297 x = CONST_VECTOR_ELT (x, 0);
1298 xcode = GET_CODE (x);
1300 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1301 fprintf (file, "a");
1302 else if (xcode == HIGH)
1303 fprintf (file, "hu");
1317 case 'T': /* 128 bits, signed */
1318 case 'D': /* 64 bits, signed */
1319 case 'S': /* 32 bits, signed */
1322 enum immediate_class c = classify_immediate (x, mode);
1326 constant_to_array (mode, x, arr);
1327 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1328 val = trunc_int_for_mode (val, SImode);
1329 switch (which_immediate_load (val))
1336 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1341 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1344 constant_to_array (mode, x, arr);
1346 for (i = 0; i < 16; i++)
1351 print_operand (file, GEN_INT (val), 0);
1354 constant_to_array (mode, x, arr);
1355 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1361 if (GET_CODE (x) == CONST_VECTOR)
1362 x = CONST_VECTOR_ELT (x, 0);
1363 output_addr_const (file, x);
1365 fprintf (file, "@h");
1379 if (xcode == CONST_INT)
1381 /* Only 4 least significant bits are relevant for generate
1382 control word instructions. */
1383 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1388 case 'M': /* print code for c*d */
1389 if (GET_CODE (x) == CONST_INT)
1393 fprintf (file, "b");
1396 fprintf (file, "h");
1399 fprintf (file, "w");
1402 fprintf (file, "d");
1411 case 'N': /* Negate the operand */
1412 if (xcode == CONST_INT)
1413 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1414 else if (xcode == CONST_VECTOR)
1415 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1416 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1419 case 'I': /* enable/disable interrupts */
1420 if (xcode == CONST_INT)
1421 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1424 case 'b': /* branch modifiers */
1426 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1427 else if (COMPARISON_P (x))
1428 fprintf (file, "%s", xcode == NE ? "n" : "");
1431 case 'i': /* indirect call */
1434 if (GET_CODE (XEXP (x, 0)) == REG)
1435 /* Used in indirect function calls. */
1436 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1438 output_address (XEXP (x, 0));
1442 case 'p': /* load/store */
1446 xcode = GET_CODE (x);
1451 xcode = GET_CODE (x);
1454 fprintf (file, "d");
1455 else if (xcode == CONST_INT)
1456 fprintf (file, "a");
1457 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1458 fprintf (file, "r");
1459 else if (xcode == PLUS || xcode == LO_SUM)
1461 if (GET_CODE (XEXP (x, 1)) == REG)
1462 fprintf (file, "x");
1464 fprintf (file, "d");
1469 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1471 output_addr_const (file, GEN_INT (val));
1475 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1477 output_addr_const (file, GEN_INT (val));
1481 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1483 output_addr_const (file, GEN_INT (val));
1487 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1488 val = (val >> 3) & 0x1f;
1489 output_addr_const (file, GEN_INT (val));
1493 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1496 output_addr_const (file, GEN_INT (val));
1500 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1503 output_addr_const (file, GEN_INT (val));
1507 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1510 output_addr_const (file, GEN_INT (val));
1514 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1515 val = -(val & -8ll);
1516 val = (val >> 3) & 0x1f;
1517 output_addr_const (file, GEN_INT (val));
1522 fprintf (file, "%s", reg_names[REGNO (x)]);
1523 else if (xcode == MEM)
1524 output_address (XEXP (x, 0));
1525 else if (xcode == CONST_VECTOR)
1526 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1528 output_addr_const (file, x);
1535 output_operand_lossage ("invalid %%xn code");
1540 extern char call_used_regs[];
1542 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1543 caller saved register. For leaf functions it is more efficient to
1544 use a volatile register because we won't need to save and restore the
1545 pic register. This routine is only valid after register allocation
1546 is completed, so we can pick an unused register. */
1550 rtx pic_reg = pic_offset_table_rtx;
1551 if (!reload_completed && !reload_in_progress)
1556 /* Split constant addresses to handle cases that are too large.
1557 Add in the pic register when in PIC mode.
1558 Split immediates that require more than 1 instruction. */
1560 spu_split_immediate (rtx * ops)
1562 enum machine_mode mode = GET_MODE (ops[0]);
1563 enum immediate_class c = classify_immediate (ops[1], mode);
1569 unsigned char arrhi[16];
1570 unsigned char arrlo[16];
1571 rtx to, temp, hi, lo;
1573 enum machine_mode imode = mode;
1574 /* We need to do reals as ints because the constant used in the
1575 IOR might not be a legitimate real constant. */
1576 imode = int_mode_for_mode (mode);
1577 constant_to_array (mode, ops[1], arrhi);
1579 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1582 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1583 for (i = 0; i < 16; i += 4)
1585 arrlo[i + 2] = arrhi[i + 2];
1586 arrlo[i + 3] = arrhi[i + 3];
1587 arrlo[i + 0] = arrlo[i + 1] = 0;
1588 arrhi[i + 2] = arrhi[i + 3] = 0;
1590 hi = array_to_constant (imode, arrhi);
1591 lo = array_to_constant (imode, arrlo);
1592 emit_move_insn (temp, hi);
1593 emit_insn (gen_rtx_SET
1594 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1599 unsigned char arr_fsmbi[16];
1600 unsigned char arr_andbi[16];
1601 rtx to, reg_fsmbi, reg_and;
1603 enum machine_mode imode = mode;
1604 /* We need to do reals as ints because the constant used in the
1605 * AND might not be a legitimate real constant. */
1606 imode = int_mode_for_mode (mode);
1607 constant_to_array (mode, ops[1], arr_fsmbi);
1609 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1612 for (i = 0; i < 16; i++)
1613 if (arr_fsmbi[i] != 0)
1615 arr_andbi[0] = arr_fsmbi[i];
1616 arr_fsmbi[i] = 0xff;
1618 for (i = 1; i < 16; i++)
1619 arr_andbi[i] = arr_andbi[0];
1620 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1621 reg_and = array_to_constant (imode, arr_andbi);
1622 emit_move_insn (to, reg_fsmbi);
1623 emit_insn (gen_rtx_SET
1624 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1628 if (reload_in_progress || reload_completed)
1630 rtx mem = force_const_mem (mode, ops[1]);
1631 if (TARGET_LARGE_MEM)
1633 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1634 emit_move_insn (addr, XEXP (mem, 0));
1635 mem = replace_equiv_address (mem, addr);
1637 emit_move_insn (ops[0], mem);
1643 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1647 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1648 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1651 emit_insn (gen_pic (ops[0], ops[1]));
1654 rtx pic_reg = get_pic_reg ();
1655 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1656 crtl->uses_pic_offset_table = 1;
1658 return flag_pic || c == IC_IL2s;
1669 /* SAVING is TRUE when we are generating the actual load and store
1670 instructions for REGNO. When determining the size of the stack
1671 needed for saving register we must allocate enough space for the
1672 worst case, because we don't always have the information early enough
1673 to not allocate it. But we can at least eliminate the actual loads
1674 and stores during the prologue/epilogue. */
1676 need_to_save_reg (int regno, int saving)
1678 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1681 && regno == PIC_OFFSET_TABLE_REGNUM
1682 && (!saving || crtl->uses_pic_offset_table)
1684 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1689 /* This function is only correct starting with local register
1692 spu_saved_regs_size (void)
1694 int reg_save_size = 0;
1697 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1698 if (need_to_save_reg (regno, 0))
1699 reg_save_size += 0x10;
1700 return reg_save_size;
1704 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1706 rtx reg = gen_rtx_REG (V4SImode, regno);
1708 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1709 return emit_insn (gen_movv4si (mem, reg));
1713 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1715 rtx reg = gen_rtx_REG (V4SImode, regno);
1717 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1718 return emit_insn (gen_movv4si (reg, mem));
1721 /* This happens after reload, so we need to expand it. */
1723 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1726 if (satisfies_constraint_K (GEN_INT (imm)))
1728 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1732 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1733 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1734 if (REGNO (src) == REGNO (scratch))
1740 /* Return nonzero if this function is known to have a null epilogue. */
1743 direct_return (void)
1745 if (reload_completed)
1747 if (cfun->static_chain_decl == 0
1748 && (spu_saved_regs_size ()
1750 + crtl->outgoing_args_size
1751 + crtl->args.pretend_args_size == 0)
1752 && current_function_is_leaf)
1759 The stack frame looks like this:
1763 AP -> +-------------+
1766 prev SP | back chain |
1769 | reg save | crtl->args.pretend_args_size bytes
1772 | saved regs | spu_saved_regs_size() bytes
1773 FP -> +-------------+
1775 | vars | get_frame_size() bytes
1776 HFP -> +-------------+
1779 | args | crtl->outgoing_args_size bytes
1785 SP -> +-------------+
1789 spu_expand_prologue (void)
1791 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1792 HOST_WIDE_INT total_size;
1793 HOST_WIDE_INT saved_regs_size;
1794 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1795 rtx scratch_reg_0, scratch_reg_1;
1798 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1799 the "toplevel" insn chain. */
1800 emit_note (NOTE_INSN_DELETED);
1802 if (flag_pic && optimize == 0)
1803 crtl->uses_pic_offset_table = 1;
1805 if (spu_naked_function_p (current_function_decl))
1808 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1809 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1811 saved_regs_size = spu_saved_regs_size ();
1812 total_size = size + saved_regs_size
1813 + crtl->outgoing_args_size
1814 + crtl->args.pretend_args_size;
1816 if (!current_function_is_leaf
1817 || cfun->calls_alloca || total_size > 0)
1818 total_size += STACK_POINTER_OFFSET;
1820 /* Save this first because code after this might use the link
1821 register as a scratch register. */
1822 if (!current_function_is_leaf)
1824 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1825 RTX_FRAME_RELATED_P (insn) = 1;
1830 offset = -crtl->args.pretend_args_size;
1831 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1832 if (need_to_save_reg (regno, 1))
1835 insn = frame_emit_store (regno, sp_reg, offset);
1836 RTX_FRAME_RELATED_P (insn) = 1;
1840 if (flag_pic && crtl->uses_pic_offset_table)
1842 rtx pic_reg = get_pic_reg ();
1843 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1844 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1849 if (flag_stack_check)
1851 /* We compare against total_size-1 because
1852 ($sp >= total_size) <=> ($sp > total_size-1) */
1853 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1854 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1855 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1856 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1858 emit_move_insn (scratch_v4si, size_v4si);
1859 size_v4si = scratch_v4si;
1861 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1862 emit_insn (gen_vec_extractv4si
1863 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1864 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1867 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1868 the value of the previous $sp because we save it as the back
1870 if (total_size <= 2000)
1872 /* In this case we save the back chain first. */
1873 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1875 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1877 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1879 insn = emit_move_insn (scratch_reg_0, sp_reg);
1881 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1885 insn = emit_move_insn (scratch_reg_0, sp_reg);
1887 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1889 RTX_FRAME_RELATED_P (insn) = 1;
1890 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1892 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1894 if (total_size > 2000)
1896 /* Save the back chain ptr */
1897 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1900 if (frame_pointer_needed)
1902 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1903 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1904 + crtl->outgoing_args_size;
1905 /* Set the new frame_pointer */
1906 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1907 RTX_FRAME_RELATED_P (insn) = 1;
1908 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1910 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1911 real, REG_NOTES (insn));
1912 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1916 emit_note (NOTE_INSN_DELETED);
1920 spu_expand_epilogue (bool sibcall_p)
1922 int size = get_frame_size (), offset, regno;
1923 HOST_WIDE_INT saved_regs_size, total_size;
1924 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1925 rtx jump, scratch_reg_0;
1927 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1928 the "toplevel" insn chain. */
1929 emit_note (NOTE_INSN_DELETED);
1931 if (spu_naked_function_p (current_function_decl))
1934 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1936 saved_regs_size = spu_saved_regs_size ();
1937 total_size = size + saved_regs_size
1938 + crtl->outgoing_args_size
1939 + crtl->args.pretend_args_size;
1941 if (!current_function_is_leaf
1942 || cfun->calls_alloca || total_size > 0)
1943 total_size += STACK_POINTER_OFFSET;
1947 if (cfun->calls_alloca)
1948 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1950 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1953 if (saved_regs_size > 0)
1955 offset = -crtl->args.pretend_args_size;
1956 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1957 if (need_to_save_reg (regno, 1))
1960 frame_emit_load (regno, sp_reg, offset);
1965 if (!current_function_is_leaf)
1966 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1970 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1971 jump = emit_jump_insn (gen__return ());
1972 emit_barrier_after (jump);
1975 emit_note (NOTE_INSN_DELETED);
1979 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1983 /* This is inefficient because it ends up copying to a save-register
1984 which then gets saved even though $lr has already been saved. But
1985 it does generate better code for leaf functions and we don't need
1986 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1987 used for __builtin_return_address anyway, so maybe we don't care if
1988 it's inefficient. */
1989 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1993 /* Given VAL, generate a constant appropriate for MODE.
1994 If MODE is a vector mode, every element will be VAL.
1995 For TImode, VAL will be zero extended to 128 bits. */
1997 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2003 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2004 || GET_MODE_CLASS (mode) == MODE_FLOAT
2005 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2006 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2008 if (GET_MODE_CLASS (mode) == MODE_INT)
2009 return immed_double_const (val, 0, mode);
2011 /* val is the bit representation of the float */
2012 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2013 return hwint_to_const_double (mode, val);
2015 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2016 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2018 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2020 units = GET_MODE_NUNITS (mode);
2022 v = rtvec_alloc (units);
2024 for (i = 0; i < units; ++i)
2025 RTVEC_ELT (v, i) = inner;
2027 return gen_rtx_CONST_VECTOR (mode, v);
2030 /* Create a MODE vector constant from 4 ints. */
2032 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2034 unsigned char arr[16];
2035 arr[0] = (a >> 24) & 0xff;
2036 arr[1] = (a >> 16) & 0xff;
2037 arr[2] = (a >> 8) & 0xff;
2038 arr[3] = (a >> 0) & 0xff;
2039 arr[4] = (b >> 24) & 0xff;
2040 arr[5] = (b >> 16) & 0xff;
2041 arr[6] = (b >> 8) & 0xff;
2042 arr[7] = (b >> 0) & 0xff;
2043 arr[8] = (c >> 24) & 0xff;
2044 arr[9] = (c >> 16) & 0xff;
2045 arr[10] = (c >> 8) & 0xff;
2046 arr[11] = (c >> 0) & 0xff;
2047 arr[12] = (d >> 24) & 0xff;
2048 arr[13] = (d >> 16) & 0xff;
2049 arr[14] = (d >> 8) & 0xff;
2050 arr[15] = (d >> 0) & 0xff;
2051 return array_to_constant(mode, arr);
2054 /* branch hint stuff */
2056 /* An array of these is used to propagate hints to predecessor blocks. */
2059 rtx prop_jump; /* propagated from another block */
2060 int bb_index; /* the original block. */
2062 static struct spu_bb_info *spu_bb_info;
2064 #define STOP_HINT_P(INSN) \
2065 (GET_CODE(INSN) == CALL_INSN \
2066 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2067 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2069 /* 1 when RTX is a hinted branch or its target. We keep track of
2070 what has been hinted so the safe-hint code can test it easily. */
2071 #define HINTED_P(RTX) \
2072 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2074 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2075 #define SCHED_ON_EVEN_P(RTX) \
2076 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2078 /* Emit a nop for INSN such that the two will dual issue. This assumes
2079 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2080 We check for TImode to handle a MULTI1 insn which has dual issued its
2081 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2084 emit_nop_for_insn (rtx insn)
2088 p = get_pipe (insn);
2089 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2090 new_insn = emit_insn_after (gen_lnop (), insn);
2091 else if (p == 1 && GET_MODE (insn) == TImode)
2093 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2094 PUT_MODE (new_insn, TImode);
2095 PUT_MODE (insn, VOIDmode);
2098 new_insn = emit_insn_after (gen_lnop (), insn);
2099 recog_memoized (new_insn);
2102 /* Insert nops in basic blocks to meet dual issue alignment
2103 requirements. Also make sure hbrp and hint instructions are at least
2104 one cycle apart, possibly inserting a nop. */
2108 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2112 /* This sets up INSN_ADDRESSES. */
2113 shorten_branches (get_insns ());
2115 /* Keep track of length added by nops. */
2119 insn = get_insns ();
2120 if (!active_insn_p (insn))
2121 insn = next_active_insn (insn);
2122 for (; insn; insn = next_insn)
2124 next_insn = next_active_insn (insn);
2125 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2126 || INSN_CODE (insn) == CODE_FOR_hbr)
2130 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2131 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2132 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2135 prev_insn = emit_insn_before (gen_lnop (), insn);
2136 PUT_MODE (prev_insn, GET_MODE (insn));
2137 PUT_MODE (insn, TImode);
2143 if (INSN_CODE (insn) == CODE_FOR_blockage)
2145 if (GET_MODE (insn) == TImode)
2146 PUT_MODE (next_insn, TImode);
2148 next_insn = next_active_insn (insn);
2150 addr = INSN_ADDRESSES (INSN_UID (insn));
2151 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2153 if (((addr + length) & 7) != 0)
2155 emit_nop_for_insn (prev_insn);
2159 else if (GET_MODE (insn) == TImode
2160 && ((next_insn && GET_MODE (next_insn) != TImode)
2161 || get_attr_type (insn) == TYPE_MULTI0)
2162 && ((addr + length) & 7) != 0)
2164 /* prev_insn will always be set because the first insn is
2165 always 8-byte aligned. */
2166 emit_nop_for_insn (prev_insn);
2174 /* Routines for branch hints. */
2177 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2178 int distance, sbitmap blocks)
2180 rtx branch_label = 0;
2185 if (before == 0 || branch == 0 || target == 0)
2188 /* While scheduling we require hints to be no further than 600, so
2189 we need to enforce that here too */
2193 /* If we have a Basic block note, emit it after the basic block note. */
2194 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2195 before = NEXT_INSN (before);
2197 branch_label = gen_label_rtx ();
2198 LABEL_NUSES (branch_label)++;
2199 LABEL_PRESERVE_P (branch_label) = 1;
2200 insn = emit_label_before (branch_label, branch);
2201 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2202 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2204 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2205 recog_memoized (hint);
2206 HINTED_P (branch) = 1;
2208 if (GET_CODE (target) == LABEL_REF)
2209 HINTED_P (XEXP (target, 0)) = 1;
2210 else if (tablejump_p (branch, 0, &table))
2214 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2215 vec = XVEC (PATTERN (table), 0);
2217 vec = XVEC (PATTERN (table), 1);
2218 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2219 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2222 if (distance >= 588)
2224 /* Make sure the hint isn't scheduled any earlier than this point,
2225 which could make it too far for the branch offest to fit */
2226 recog_memoized (emit_insn_before (gen_blockage (), hint));
2228 else if (distance <= 8 * 4)
2230 /* To guarantee at least 8 insns between the hint and branch we
2233 for (d = distance; d < 8 * 4; d += 4)
2236 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2237 recog_memoized (insn);
2240 /* Make sure any nops inserted aren't scheduled before the hint. */
2241 recog_memoized (emit_insn_after (gen_blockage (), hint));
2243 /* Make sure any nops inserted aren't scheduled after the call. */
2244 if (CALL_P (branch) && distance < 8 * 4)
2245 recog_memoized (emit_insn_before (gen_blockage (), branch));
2249 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2250 the rtx for the branch target. */
2252 get_branch_target (rtx branch)
2254 if (GET_CODE (branch) == JUMP_INSN)
2258 /* Return statements */
2259 if (GET_CODE (PATTERN (branch)) == RETURN)
2260 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2263 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2264 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2267 set = single_set (branch);
2268 src = SET_SRC (set);
2269 if (GET_CODE (SET_DEST (set)) != PC)
2272 if (GET_CODE (src) == IF_THEN_ELSE)
2275 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2278 /* If the more probable case is not a fall through, then
2279 try a branch hint. */
2280 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2281 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2282 && GET_CODE (XEXP (src, 1)) != PC)
2283 lab = XEXP (src, 1);
2284 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2285 && GET_CODE (XEXP (src, 2)) != PC)
2286 lab = XEXP (src, 2);
2290 if (GET_CODE (lab) == RETURN)
2291 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2299 else if (GET_CODE (branch) == CALL_INSN)
2302 /* All of our call patterns are in a PARALLEL and the CALL is
2303 the first pattern in the PARALLEL. */
2304 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2306 call = XVECEXP (PATTERN (branch), 0, 0);
2307 if (GET_CODE (call) == SET)
2308 call = SET_SRC (call);
2309 if (GET_CODE (call) != CALL)
2311 return XEXP (XEXP (call, 0), 0);
2316 /* The special $hbr register is used to prevent the insn scheduler from
2317 moving hbr insns across instructions which invalidate them. It
2318 should only be used in a clobber, and this function searches for
2319 insns which clobber it. */
2321 insn_clobbers_hbr (rtx insn)
2324 && GET_CODE (PATTERN (insn)) == PARALLEL)
2326 rtx parallel = PATTERN (insn);
2329 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2331 clobber = XVECEXP (parallel, 0, j);
2332 if (GET_CODE (clobber) == CLOBBER
2333 && GET_CODE (XEXP (clobber, 0)) == REG
2334 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2341 /* Search up to 32 insns starting at FIRST:
2342 - at any kind of hinted branch, just return
2343 - at any unconditional branch in the first 15 insns, just return
2344 - at a call or indirect branch, after the first 15 insns, force it to
2345 an even address and return
2346 - at any unconditional branch, after the first 15 insns, force it to
2348 At then end of the search, insert an hbrp within 4 insns of FIRST,
2349 and an hbrp within 16 instructions of FIRST.
2352 insert_hbrp_for_ilb_runout (rtx first)
2354 rtx insn, before_4 = 0, before_16 = 0;
2355 int addr = 0, length, first_addr = -1;
2356 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2357 int insert_lnop_after = 0;
2358 for (insn = first; insn; insn = NEXT_INSN (insn))
2361 if (first_addr == -1)
2362 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2363 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2364 length = get_attr_length (insn);
2366 if (before_4 == 0 && addr + length >= 4 * 4)
2368 /* We test for 14 instructions because the first hbrp will add
2369 up to 2 instructions. */
2370 if (before_16 == 0 && addr + length >= 14 * 4)
2373 if (INSN_CODE (insn) == CODE_FOR_hbr)
2375 /* Make sure an hbrp is at least 2 cycles away from a hint.
2376 Insert an lnop after the hbrp when necessary. */
2377 if (before_4 == 0 && addr > 0)
2380 insert_lnop_after |= 1;
2382 else if (before_4 && addr <= 4 * 4)
2383 insert_lnop_after |= 1;
2384 if (before_16 == 0 && addr > 10 * 4)
2387 insert_lnop_after |= 2;
2389 else if (before_16 && addr <= 14 * 4)
2390 insert_lnop_after |= 2;
2393 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2395 if (addr < hbrp_addr0)
2397 else if (addr < hbrp_addr1)
2401 if (CALL_P (insn) || JUMP_P (insn))
2403 if (HINTED_P (insn))
2406 /* Any branch after the first 15 insns should be on an even
2407 address to avoid a special case branch. There might be
2408 some nops and/or hbrps inserted, so we test after 10
2411 SCHED_ON_EVEN_P (insn) = 1;
2414 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2418 if (addr + length >= 32 * 4)
2420 gcc_assert (before_4 && before_16);
2421 if (hbrp_addr0 > 4 * 4)
2424 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2425 recog_memoized (insn);
2426 INSN_ADDRESSES_NEW (insn,
2427 INSN_ADDRESSES (INSN_UID (before_4)));
2428 PUT_MODE (insn, GET_MODE (before_4));
2429 PUT_MODE (before_4, TImode);
2430 if (insert_lnop_after & 1)
2432 insn = emit_insn_before (gen_lnop (), before_4);
2433 recog_memoized (insn);
2434 INSN_ADDRESSES_NEW (insn,
2435 INSN_ADDRESSES (INSN_UID (before_4)));
2436 PUT_MODE (insn, TImode);
2439 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2440 && hbrp_addr1 > 16 * 4)
2443 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2444 recog_memoized (insn);
2445 INSN_ADDRESSES_NEW (insn,
2446 INSN_ADDRESSES (INSN_UID (before_16)));
2447 PUT_MODE (insn, GET_MODE (before_16));
2448 PUT_MODE (before_16, TImode);
2449 if (insert_lnop_after & 2)
2451 insn = emit_insn_before (gen_lnop (), before_16);
2452 recog_memoized (insn);
2453 INSN_ADDRESSES_NEW (insn,
2454 INSN_ADDRESSES (INSN_UID
2456 PUT_MODE (insn, TImode);
2462 else if (BARRIER_P (insn))
2467 /* The SPU might hang when it executes 48 inline instructions after a
2468 hinted branch jumps to its hinted target. The beginning of a
2469 function and the return from a call might have been hinted, and must
2470 be handled as well. To prevent a hang we insert 2 hbrps. The first
2471 should be within 6 insns of the branch target. The second should be
2472 within 22 insns of the branch target. When determining if hbrps are
2473 necessary, we look for only 32 inline instructions, because up to to
2474 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2475 new hbrps, we insert them within 4 and 16 insns of the target. */
2480 if (TARGET_SAFE_HINTS)
2482 shorten_branches (get_insns ());
2483 /* Insert hbrp at beginning of function */
2484 insn = next_active_insn (get_insns ());
2486 insert_hbrp_for_ilb_runout (insn);
2487 /* Insert hbrp after hinted targets. */
2488 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2489 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2490 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2494 static int in_spu_reorg;
2496 /* Insert branch hints. There are no branch optimizations after this
2497 pass, so it's safe to set our branch hints now. */
2499 spu_machine_dependent_reorg (void)
2504 rtx branch_target = 0;
2505 int branch_addr = 0, insn_addr, required_dist = 0;
2509 if (!TARGET_BRANCH_HINTS || optimize == 0)
2511 /* We still do it for unoptimized code because an external
2512 function might have hinted a call or return. */
2518 blocks = sbitmap_alloc (last_basic_block);
2519 sbitmap_zero (blocks);
2522 compute_bb_for_insn ();
2527 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2528 sizeof (struct spu_bb_info));
2530 /* We need exact insn addresses and lengths. */
2531 shorten_branches (get_insns ());
2533 for (i = n_basic_blocks - 1; i >= 0; i--)
2535 bb = BASIC_BLOCK (i);
2537 if (spu_bb_info[i].prop_jump)
2539 branch = spu_bb_info[i].prop_jump;
2540 branch_target = get_branch_target (branch);
2541 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2542 required_dist = spu_hint_dist;
2544 /* Search from end of a block to beginning. In this loop, find
2545 jumps which need a branch and emit them only when:
2546 - it's an indirect branch and we're at the insn which sets
2548 - we're at an insn that will invalidate the hint. e.g., a
2549 call, another hint insn, inline asm that clobbers $hbr, and
2550 some inlined operations (divmodsi4). Don't consider jumps
2551 because they are only at the end of a block and are
2552 considered when we are deciding whether to propagate
2553 - we're getting too far away from the branch. The hbr insns
2554 only have a signed 10 bit offset
2555 We go back as far as possible so the branch will be considered
2556 for propagation when we get to the beginning of the block. */
2557 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2561 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2563 && ((GET_CODE (branch_target) == REG
2564 && set_of (branch_target, insn) != NULL_RTX)
2565 || insn_clobbers_hbr (insn)
2566 || branch_addr - insn_addr > 600))
2568 rtx next = NEXT_INSN (insn);
2569 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2570 if (insn != BB_END (bb)
2571 && branch_addr - next_addr >= required_dist)
2575 "hint for %i in block %i before %i\n",
2576 INSN_UID (branch), bb->index,
2578 spu_emit_branch_hint (next, branch, branch_target,
2579 branch_addr - next_addr, blocks);
2584 /* JUMP_P will only be true at the end of a block. When
2585 branch is already set it means we've previously decided
2586 to propagate a hint for that branch into this block. */
2587 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2590 if ((branch_target = get_branch_target (insn)))
2593 branch_addr = insn_addr;
2594 required_dist = spu_hint_dist;
2598 if (insn == BB_HEAD (bb))
2604 /* If we haven't emitted a hint for this branch yet, it might
2605 be profitable to emit it in one of the predecessor blocks,
2606 especially for loops. */
2608 basic_block prev = 0, prop = 0, prev2 = 0;
2609 int loop_exit = 0, simple_loop = 0;
2610 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2612 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2613 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2614 prev = EDGE_PRED (bb, j)->src;
2616 prev2 = EDGE_PRED (bb, j)->src;
2618 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2619 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2621 else if (EDGE_SUCC (bb, j)->dest == bb)
2624 /* If this branch is a loop exit then propagate to previous
2625 fallthru block. This catches the cases when it is a simple
2626 loop or when there is an initial branch into the loop. */
2627 if (prev && (loop_exit || simple_loop)
2628 && prev->loop_depth <= bb->loop_depth)
2631 /* If there is only one adjacent predecessor. Don't propagate
2632 outside this loop. This loop_depth test isn't perfect, but
2633 I'm not sure the loop_father member is valid at this point. */
2634 else if (prev && single_pred_p (bb)
2635 && prev->loop_depth == bb->loop_depth)
2638 /* If this is the JOIN block of a simple IF-THEN then
2639 propogate the hint to the HEADER block. */
2640 else if (prev && prev2
2641 && EDGE_COUNT (bb->preds) == 2
2642 && EDGE_COUNT (prev->preds) == 1
2643 && EDGE_PRED (prev, 0)->src == prev2
2644 && prev2->loop_depth == bb->loop_depth
2645 && GET_CODE (branch_target) != REG)
2648 /* Don't propagate when:
2649 - this is a simple loop and the hint would be too far
2650 - this is not a simple loop and there are 16 insns in
2652 - the predecessor block ends in a branch that will be
2654 - the predecessor block ends in an insn that invalidates
2658 && (bbend = BB_END (prop))
2659 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2660 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2661 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2664 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2665 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2666 bb->index, prop->index, bb->loop_depth,
2667 INSN_UID (branch), loop_exit, simple_loop,
2668 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2670 spu_bb_info[prop->index].prop_jump = branch;
2671 spu_bb_info[prop->index].bb_index = i;
2673 else if (branch_addr - next_addr >= required_dist)
2676 fprintf (dump_file, "hint for %i in block %i before %i\n",
2677 INSN_UID (branch), bb->index,
2678 INSN_UID (NEXT_INSN (insn)));
2679 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2680 branch_addr - next_addr, blocks);
2687 if (!sbitmap_empty_p (blocks))
2688 find_many_sub_basic_blocks (blocks);
2690 /* We have to schedule to make sure alignment is ok. */
2691 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2693 /* The hints need to be scheduled, so call it again. */
2701 if (spu_flag_var_tracking)
2704 timevar_push (TV_VAR_TRACKING);
2705 variable_tracking_main ();
2706 timevar_pop (TV_VAR_TRACKING);
2707 df_finish_pass (false);
2710 free_bb_for_insn ();
2716 /* Insn scheduling routines, primarily for dual issue. */
2718 spu_sched_issue_rate (void)
2724 uses_ls_unit(rtx insn)
2726 rtx set = single_set (insn);
2728 && (GET_CODE (SET_DEST (set)) == MEM
2729 || GET_CODE (SET_SRC (set)) == MEM))
2738 /* Handle inline asm */
2739 if (INSN_CODE (insn) == -1)
2741 t = get_attr_type (insn);
2766 case TYPE_IPREFETCH:
2774 /* haifa-sched.c has a static variable that keeps track of the current
2775 cycle. It is passed to spu_sched_reorder, and we record it here for
2776 use by spu_sched_variable_issue. It won't be accurate if the
2777 scheduler updates it's clock_var between the two calls. */
2778 static int clock_var;
2780 /* This is used to keep track of insn alignment. Set to 0 at the
2781 beginning of each block and increased by the "length" attr of each
2783 static int spu_sched_length;
2785 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2786 ready list appropriately in spu_sched_reorder(). */
2787 static int pipe0_clock;
2788 static int pipe1_clock;
2790 static int prev_clock_var;
2792 static int prev_priority;
2794 /* The SPU needs to load the next ilb sometime during the execution of
2795 the previous ilb. There is a potential conflict if every cycle has a
2796 load or store. To avoid the conflict we make sure the load/store
2797 unit is free for at least one cycle during the execution of insns in
2798 the previous ilb. */
2799 static int spu_ls_first;
2800 static int prev_ls_clock;
2803 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2804 int max_ready ATTRIBUTE_UNUSED)
2806 spu_sched_length = 0;
2810 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2811 int max_ready ATTRIBUTE_UNUSED)
2813 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2815 /* When any block might be at least 8-byte aligned, assume they
2816 will all be at least 8-byte aligned to make sure dual issue
2817 works out correctly. */
2818 spu_sched_length = 0;
2820 spu_ls_first = INT_MAX;
2825 prev_clock_var = -1;
2830 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2831 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2835 if (GET_CODE (PATTERN (insn)) == USE
2836 || GET_CODE (PATTERN (insn)) == CLOBBER
2837 || (len = get_attr_length (insn)) == 0)
2840 spu_sched_length += len;
2842 /* Reset on inline asm */
2843 if (INSN_CODE (insn) == -1)
2845 spu_ls_first = INT_MAX;
2850 p = get_pipe (insn);
2852 pipe0_clock = clock_var;
2854 pipe1_clock = clock_var;
2858 if (clock_var - prev_ls_clock > 1
2859 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2860 spu_ls_first = INT_MAX;
2861 if (uses_ls_unit (insn))
2863 if (spu_ls_first == INT_MAX)
2864 spu_ls_first = spu_sched_length;
2865 prev_ls_clock = clock_var;
2868 /* The scheduler hasn't inserted the nop, but we will later on.
2869 Include those nops in spu_sched_length. */
2870 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2871 spu_sched_length += 4;
2872 prev_clock_var = clock_var;
2874 /* more is -1 when called from spu_sched_reorder for new insns
2875 that don't have INSN_PRIORITY */
2877 prev_priority = INSN_PRIORITY (insn);
2880 /* Always try issueing more insns. spu_sched_reorder will decide
2881 when the cycle should be advanced. */
2885 /* This function is called for both TARGET_SCHED_REORDER and
2886 TARGET_SCHED_REORDER2. */
2888 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2889 rtx *ready, int *nreadyp, int clock)
2891 int i, nready = *nreadyp;
2892 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2897 if (nready <= 0 || pipe1_clock >= clock)
2900 /* Find any rtl insns that don't generate assembly insns and schedule
2902 for (i = nready - 1; i >= 0; i--)
2905 if (INSN_CODE (insn) == -1
2906 || INSN_CODE (insn) == CODE_FOR_blockage
2907 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2909 ready[i] = ready[nready - 1];
2910 ready[nready - 1] = insn;
2915 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2916 for (i = 0; i < nready; i++)
2917 if (INSN_CODE (ready[i]) != -1)
2920 switch (get_attr_type (insn))
2945 case TYPE_IPREFETCH:
2951 /* In the first scheduling phase, schedule loads and stores together
2952 to increase the chance they will get merged during postreload CSE. */
2953 if (!reload_completed && pipe_ls >= 0)
2955 insn = ready[pipe_ls];
2956 ready[pipe_ls] = ready[nready - 1];
2957 ready[nready - 1] = insn;
2961 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2965 /* When we have loads/stores in every cycle of the last 15 insns and
2966 we are about to schedule another load/store, emit an hbrp insn
2969 && spu_sched_length - spu_ls_first >= 4 * 15
2970 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2972 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2973 recog_memoized (insn);
2974 if (pipe0_clock < clock)
2975 PUT_MODE (insn, TImode);
2976 spu_sched_variable_issue (file, verbose, insn, -1);
2980 /* In general, we want to emit nops to increase dual issue, but dual
2981 issue isn't faster when one of the insns could be scheduled later
2982 without effecting the critical path. We look at INSN_PRIORITY to
2983 make a good guess, but it isn't perfect so -mdual-nops=n can be
2984 used to effect it. */
2985 if (in_spu_reorg && spu_dual_nops < 10)
2987 /* When we are at an even address and we are not issueing nops to
2988 improve scheduling then we need to advance the cycle. */
2989 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2990 && (spu_dual_nops == 0
2993 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2996 /* When at an odd address, schedule the highest priority insn
2997 without considering pipeline. */
2998 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2999 && (spu_dual_nops == 0
3001 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3006 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3007 pipe0 insn in the ready list, schedule it. */
3008 if (pipe0_clock < clock && pipe_0 >= 0)
3009 schedule_i = pipe_0;
3011 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3012 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3014 schedule_i = pipe_1;
3016 if (schedule_i > -1)
3018 insn = ready[schedule_i];
3019 ready[schedule_i] = ready[nready - 1];
3020 ready[nready - 1] = insn;
3026 /* INSN is dependent on DEP_INSN. */
3028 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3032 /* The blockage pattern is used to prevent instructions from being
3033 moved across it and has no cost. */
3034 if (INSN_CODE (insn) == CODE_FOR_blockage
3035 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3038 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3039 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3042 /* Make sure hbrps are spread out. */
3043 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3044 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3047 /* Make sure hints and hbrps are 2 cycles apart. */
3048 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3049 || INSN_CODE (insn) == CODE_FOR_hbr)
3050 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3051 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3054 /* An hbrp has no real dependency on other insns. */
3055 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3056 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3059 /* Assuming that it is unlikely an argument register will be used in
3060 the first cycle of the called function, we reduce the cost for
3061 slightly better scheduling of dep_insn. When not hinted, the
3062 mispredicted branch would hide the cost as well. */
3065 rtx target = get_branch_target (insn);
3066 if (GET_CODE (target) != REG || !set_of (target, insn))
3071 /* And when returning from a function, let's assume the return values
3072 are completed sooner too. */
3073 if (CALL_P (dep_insn))
3076 /* Make sure an instruction that loads from the back chain is schedule
3077 away from the return instruction so a hint is more likely to get
3079 if (INSN_CODE (insn) == CODE_FOR__return
3080 && (set = single_set (dep_insn))
3081 && GET_CODE (SET_DEST (set)) == REG
3082 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3085 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3086 scheduler makes every insn in a block anti-dependent on the final
3087 jump_insn. We adjust here so higher cost insns will get scheduled
3089 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3090 return insn_cost (dep_insn) - 3;
3095 /* Create a CONST_DOUBLE from a string. */
3097 spu_float_const (const char *string, enum machine_mode mode)
3099 REAL_VALUE_TYPE value;
3100 value = REAL_VALUE_ATOF (string, mode);
3101 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3105 spu_constant_address_p (rtx x)
3107 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3108 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3109 || GET_CODE (x) == HIGH);
3112 static enum spu_immediate
3113 which_immediate_load (HOST_WIDE_INT val)
3115 gcc_assert (val == trunc_int_for_mode (val, SImode));
3117 if (val >= -0x8000 && val <= 0x7fff)
3119 if (val >= 0 && val <= 0x3ffff)
3121 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3123 if ((val & 0xffff) == 0)
3129 /* Return true when OP can be loaded by one of the il instructions, or
3130 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3132 immediate_load_p (rtx op, enum machine_mode mode)
3134 if (CONSTANT_P (op))
3136 enum immediate_class c = classify_immediate (op, mode);
3137 return c == IC_IL1 || c == IC_IL1s
3138 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3143 /* Return true if the first SIZE bytes of arr is a constant that can be
3144 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3145 represent the size and offset of the instruction to use. */
3147 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3149 int cpat, run, i, start;
3153 for (i = 0; i < size && cpat; i++)
3161 else if (arr[i] == 2 && arr[i+1] == 3)
3163 else if (arr[i] == 0)
3165 while (arr[i+run] == run && i+run < 16)
3167 if (run != 4 && run != 8)
3172 if ((i & (run-1)) != 0)
3179 if (cpat && (run || size < 16))
3186 *pstart = start == -1 ? 16-run : start;
3192 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3193 it into a register. MODE is only valid when OP is a CONST_INT. */
3194 static enum immediate_class
3195 classify_immediate (rtx op, enum machine_mode mode)
3198 unsigned char arr[16];
3199 int i, j, repeated, fsmbi, repeat;
3201 gcc_assert (CONSTANT_P (op));
3203 if (GET_MODE (op) != VOIDmode)
3204 mode = GET_MODE (op);
3206 /* A V4SI const_vector with all identical symbols is ok. */
3209 && GET_CODE (op) == CONST_VECTOR
3210 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3211 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3212 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3213 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3214 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3215 op = CONST_VECTOR_ELT (op, 0);
3217 switch (GET_CODE (op))
3221 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3224 /* We can never know if the resulting address fits in 18 bits and can be
3225 loaded with ila. For now, assume the address will not overflow if
3226 the displacement is "small" (fits 'K' constraint). */
3227 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3229 rtx sym = XEXP (XEXP (op, 0), 0);
3230 rtx cst = XEXP (XEXP (op, 0), 1);
3232 if (GET_CODE (sym) == SYMBOL_REF
3233 && GET_CODE (cst) == CONST_INT
3234 && satisfies_constraint_K (cst))
3243 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3244 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3245 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3251 constant_to_array (mode, op, arr);
3253 /* Check that each 4-byte slot is identical. */
3255 for (i = 4; i < 16; i += 4)
3256 for (j = 0; j < 4; j++)
3257 if (arr[j] != arr[i + j])
3262 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3263 val = trunc_int_for_mode (val, SImode);
3265 if (which_immediate_load (val) != SPU_NONE)
3269 /* Any mode of 2 bytes or smaller can be loaded with an il
3271 gcc_assert (GET_MODE_SIZE (mode) > 2);
3275 for (i = 0; i < 16 && fsmbi; i++)
3276 if (arr[i] != 0 && repeat == 0)
3278 else if (arr[i] != 0 && arr[i] != repeat)
3281 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3283 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3296 static enum spu_immediate
3297 which_logical_immediate (HOST_WIDE_INT val)
3299 gcc_assert (val == trunc_int_for_mode (val, SImode));
3301 if (val >= -0x200 && val <= 0x1ff)
3303 if (val >= 0 && val <= 0xffff)
3305 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3307 val = trunc_int_for_mode (val, HImode);
3308 if (val >= -0x200 && val <= 0x1ff)
3310 if ((val & 0xff) == ((val >> 8) & 0xff))
3312 val = trunc_int_for_mode (val, QImode);
3313 if (val >= -0x200 && val <= 0x1ff)
3320 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3323 const_vector_immediate_p (rtx x)
3326 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3327 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3328 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3329 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3335 logical_immediate_p (rtx op, enum machine_mode mode)
3338 unsigned char arr[16];
3341 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3342 || GET_CODE (op) == CONST_VECTOR);
3344 if (GET_CODE (op) == CONST_VECTOR
3345 && !const_vector_immediate_p (op))
3348 if (GET_MODE (op) != VOIDmode)
3349 mode = GET_MODE (op);
3351 constant_to_array (mode, op, arr);
3353 /* Check that bytes are repeated. */
3354 for (i = 4; i < 16; i += 4)
3355 for (j = 0; j < 4; j++)
3356 if (arr[j] != arr[i + j])
3359 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3360 val = trunc_int_for_mode (val, SImode);
3362 i = which_logical_immediate (val);
3363 return i != SPU_NONE && i != SPU_IOHL;
3367 iohl_immediate_p (rtx op, enum machine_mode mode)
3370 unsigned char arr[16];
3373 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3374 || GET_CODE (op) == CONST_VECTOR);
3376 if (GET_CODE (op) == CONST_VECTOR
3377 && !const_vector_immediate_p (op))
3380 if (GET_MODE (op) != VOIDmode)
3381 mode = GET_MODE (op);
3383 constant_to_array (mode, op, arr);
3385 /* Check that bytes are repeated. */
3386 for (i = 4; i < 16; i += 4)
3387 for (j = 0; j < 4; j++)
3388 if (arr[j] != arr[i + j])
3391 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3392 val = trunc_int_for_mode (val, SImode);
3394 return val >= 0 && val <= 0xffff;
3398 arith_immediate_p (rtx op, enum machine_mode mode,
3399 HOST_WIDE_INT low, HOST_WIDE_INT high)
3402 unsigned char arr[16];
3405 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3406 || GET_CODE (op) == CONST_VECTOR);
3408 if (GET_CODE (op) == CONST_VECTOR
3409 && !const_vector_immediate_p (op))
3412 if (GET_MODE (op) != VOIDmode)
3413 mode = GET_MODE (op);
3415 constant_to_array (mode, op, arr);
3417 if (VECTOR_MODE_P (mode))
3418 mode = GET_MODE_INNER (mode);
3420 bytes = GET_MODE_SIZE (mode);
3421 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3423 /* Check that bytes are repeated. */
3424 for (i = bytes; i < 16; i += bytes)
3425 for (j = 0; j < bytes; j++)
3426 if (arr[j] != arr[i + j])
3430 for (j = 1; j < bytes; j++)
3431 val = (val << 8) | arr[j];
3433 val = trunc_int_for_mode (val, mode);
3435 return val >= low && val <= high;
3439 - any 32-bit constant (SImode, SFmode)
3440 - any constant that can be generated with fsmbi (any mode)
3441 - a 64-bit constant where the high and low bits are identical
3443 - a 128-bit constant where the four 32-bit words match. */
3445 spu_legitimate_constant_p (rtx x)
3447 if (GET_CODE (x) == HIGH)
3449 /* V4SI with all identical symbols is valid. */
3451 && GET_MODE (x) == V4SImode
3452 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3453 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3454 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3455 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3456 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3457 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3459 if (GET_CODE (x) == CONST_VECTOR
3460 && !const_vector_immediate_p (x))
3465 /* Valid address are:
3466 - symbol_ref, label_ref, const
3468 - reg + const, where either reg or const is 16 byte aligned
3469 - reg + reg, alignment doesn't matter
3470 The alignment matters in the reg+const case because lqd and stqd
3471 ignore the 4 least significant bits of the const. (TODO: It might be
3472 preferable to allow any alignment and fix it up when splitting.) */
3474 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3475 rtx x, int reg_ok_strict)
3477 if (mode == TImode && GET_CODE (x) == AND
3478 && GET_CODE (XEXP (x, 1)) == CONST_INT
3479 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3481 switch (GET_CODE (x))
3485 return !TARGET_LARGE_MEM;
3488 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3490 rtx sym = XEXP (XEXP (x, 0), 0);
3491 rtx cst = XEXP (XEXP (x, 0), 1);
3493 /* Accept any symbol_ref + constant, assuming it does not
3494 wrap around the local store addressability limit. */
3495 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3501 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3505 gcc_assert (GET_CODE (x) == REG);
3508 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3513 rtx op0 = XEXP (x, 0);
3514 rtx op1 = XEXP (x, 1);
3515 if (GET_CODE (op0) == SUBREG)
3516 op0 = XEXP (op0, 0);
3517 if (GET_CODE (op1) == SUBREG)
3518 op1 = XEXP (op1, 0);
3519 /* We can't just accept any aligned register because CSE can
3520 change it to a register that is not marked aligned and then
3521 recog will fail. So we only accept frame registers because
3522 they will only be changed to other frame registers. */
3523 if (GET_CODE (op0) == REG
3524 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3525 && GET_CODE (op1) == CONST_INT
3526 && INTVAL (op1) >= -0x2000
3527 && INTVAL (op1) <= 0x1fff
3528 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
3530 if (GET_CODE (op0) == REG
3531 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3532 && GET_CODE (op1) == REG
3533 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3544 /* When the address is reg + const_int, force the const_int into a
3547 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3548 enum machine_mode mode)
3551 /* Make sure both operands are registers. */
3552 if (GET_CODE (x) == PLUS)
3556 if (ALIGNED_SYMBOL_REF_P (op0))
3558 op0 = force_reg (Pmode, op0);
3559 mark_reg_pointer (op0, 128);
3561 else if (GET_CODE (op0) != REG)
3562 op0 = force_reg (Pmode, op0);
3563 if (ALIGNED_SYMBOL_REF_P (op1))
3565 op1 = force_reg (Pmode, op1);
3566 mark_reg_pointer (op1, 128);
3568 else if (GET_CODE (op1) != REG)
3569 op1 = force_reg (Pmode, op1);
3570 x = gen_rtx_PLUS (Pmode, op0, op1);
3571 if (spu_legitimate_address (mode, x, 0))
3577 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3578 struct attribute_spec.handler. */
3580 spu_handle_fndecl_attribute (tree * node,
3582 tree args ATTRIBUTE_UNUSED,
3583 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3585 if (TREE_CODE (*node) != FUNCTION_DECL)
3587 warning (0, "`%s' attribute only applies to functions",
3588 IDENTIFIER_POINTER (name));
3589 *no_add_attrs = true;
3595 /* Handle the "vector" attribute. */
3597 spu_handle_vector_attribute (tree * node, tree name,
3598 tree args ATTRIBUTE_UNUSED,
3599 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3601 tree type = *node, result = NULL_TREE;
3602 enum machine_mode mode;
3605 while (POINTER_TYPE_P (type)
3606 || TREE_CODE (type) == FUNCTION_TYPE
3607 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3608 type = TREE_TYPE (type);
3610 mode = TYPE_MODE (type);
3612 unsigned_p = TYPE_UNSIGNED (type);
3616 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3619 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3622 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3625 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3628 result = V4SF_type_node;
3631 result = V2DF_type_node;
3637 /* Propagate qualifiers attached to the element type
3638 onto the vector type. */
3639 if (result && result != type && TYPE_QUALS (type))
3640 result = build_qualified_type (result, TYPE_QUALS (type));
3642 *no_add_attrs = true; /* No need to hang on to the attribute. */
3645 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3647 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3652 /* Return nonzero if FUNC is a naked function. */
3654 spu_naked_function_p (tree func)
3658 if (TREE_CODE (func) != FUNCTION_DECL)
3661 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3662 return a != NULL_TREE;
3666 spu_initial_elimination_offset (int from, int to)
3668 int saved_regs_size = spu_saved_regs_size ();
3670 if (!current_function_is_leaf || crtl->outgoing_args_size
3671 || get_frame_size () || saved_regs_size)
3672 sp_offset = STACK_POINTER_OFFSET;
3673 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3674 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3675 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3676 return get_frame_size ();
3677 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3678 return sp_offset + crtl->outgoing_args_size
3679 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3680 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3681 return get_frame_size () + saved_regs_size + sp_offset;
3687 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3689 enum machine_mode mode = TYPE_MODE (type);
3690 int byte_size = ((mode == BLKmode)
3691 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3693 /* Make sure small structs are left justified in a register. */
3694 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3695 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3697 enum machine_mode smode;
3700 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3701 int n = byte_size / UNITS_PER_WORD;
3702 v = rtvec_alloc (nregs);
3703 for (i = 0; i < n; i++)
3705 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3706 gen_rtx_REG (TImode,
3709 GEN_INT (UNITS_PER_WORD * i));
3710 byte_size -= UNITS_PER_WORD;
3718 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3720 gen_rtx_EXPR_LIST (VOIDmode,
3721 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3722 GEN_INT (UNITS_PER_WORD * n));
3724 return gen_rtx_PARALLEL (mode, v);
3726 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3730 spu_function_arg (CUMULATIVE_ARGS cum,
3731 enum machine_mode mode,
3732 tree type, int named ATTRIBUTE_UNUSED)
3736 if (cum >= MAX_REGISTER_ARGS)
3739 byte_size = ((mode == BLKmode)
3740 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3742 /* The ABI does not allow parameters to be passed partially in
3743 reg and partially in stack. */
3744 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3747 /* Make sure small structs are left justified in a register. */
3748 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3749 && byte_size < UNITS_PER_WORD && byte_size > 0)
3751 enum machine_mode smode;
3755 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3756 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3757 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3759 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3762 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3765 /* Variable sized types are passed by reference. */
3767 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3768 enum machine_mode mode ATTRIBUTE_UNUSED,
3769 const_tree type, bool named ATTRIBUTE_UNUSED)
3771 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3777 /* Create and return the va_list datatype.
3779 On SPU, va_list is an array type equivalent to
3781 typedef struct __va_list_tag
3783 void *__args __attribute__((__aligned(16)));
3784 void *__skip __attribute__((__aligned(16)));
3788 where __args points to the arg that will be returned by the next
3789 va_arg(), and __skip points to the previous stack frame such that
3790 when __args == __skip we should advance __args by 32 bytes. */
3792 spu_build_builtin_va_list (void)
3794 tree f_args, f_skip, record, type_decl;
3797 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3800 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3802 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3803 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3805 DECL_FIELD_CONTEXT (f_args) = record;
3806 DECL_ALIGN (f_args) = 128;
3807 DECL_USER_ALIGN (f_args) = 1;
3809 DECL_FIELD_CONTEXT (f_skip) = record;
3810 DECL_ALIGN (f_skip) = 128;
3811 DECL_USER_ALIGN (f_skip) = 1;
3813 TREE_CHAIN (record) = type_decl;
3814 TYPE_NAME (record) = type_decl;
3815 TYPE_FIELDS (record) = f_args;
3816 TREE_CHAIN (f_args) = f_skip;
3818 /* We know this is being padded and we want it too. It is an internal
3819 type so hide the warnings from the user. */
3821 warn_padded = false;
3823 layout_type (record);
3827 /* The correct type is an array type of one element. */
3828 return build_array_type (record, build_index_type (size_zero_node));
3831 /* Implement va_start by filling the va_list structure VALIST.
3832 NEXTARG points to the first anonymous stack argument.
3834 The following global variables are used to initialize
3835 the va_list structure:
3838 the CUMULATIVE_ARGS for this function
3840 crtl->args.arg_offset_rtx:
3841 holds the offset of the first anonymous stack argument
3842 (relative to the virtual arg pointer). */
3845 spu_va_start (tree valist, rtx nextarg)
3847 tree f_args, f_skip;
3850 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3851 f_skip = TREE_CHAIN (f_args);
3853 valist = build_va_arg_indirect_ref (valist);
3855 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3857 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3859 /* Find the __args area. */
3860 t = make_tree (TREE_TYPE (args), nextarg);
3861 if (crtl->args.pretend_args_size > 0)
3862 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3863 size_int (-STACK_POINTER_OFFSET));
3864 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3865 TREE_SIDE_EFFECTS (t) = 1;
3866 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3868 /* Find the __skip area. */
3869 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3870 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3871 size_int (crtl->args.pretend_args_size
3872 - STACK_POINTER_OFFSET));
3873 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3874 TREE_SIDE_EFFECTS (t) = 1;
3875 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3878 /* Gimplify va_arg by updating the va_list structure
3879 VALIST as required to retrieve an argument of type
3880 TYPE, and returning that argument.
3882 ret = va_arg(VALIST, TYPE);
3884 generates code equivalent to:
3886 paddedsize = (sizeof(TYPE) + 15) & -16;
3887 if (VALIST.__args + paddedsize > VALIST.__skip
3888 && VALIST.__args <= VALIST.__skip)
3889 addr = VALIST.__skip + 32;
3891 addr = VALIST.__args;
3892 VALIST.__args = addr + paddedsize;
3893 ret = *(TYPE *)addr;
3896 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3897 gimple_seq * post_p ATTRIBUTE_UNUSED)
3899 tree f_args, f_skip;
3901 HOST_WIDE_INT size, rsize;
3902 tree paddedsize, addr, tmp;
3903 bool pass_by_reference_p;
3905 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3906 f_skip = TREE_CHAIN (f_args);
3908 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3910 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3912 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3914 addr = create_tmp_var (ptr_type_node, "va_arg");
3915 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3917 /* if an object is dynamically sized, a pointer to it is passed
3918 instead of the object itself. */
3919 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3921 if (pass_by_reference_p)
3922 type = build_pointer_type (type);
3923 size = int_size_in_bytes (type);
3924 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3926 /* build conditional expression to calculate addr. The expression
3927 will be gimplified later. */
3928 paddedsize = size_int (rsize);
3929 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
3930 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3931 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3932 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3933 unshare_expr (skip)));
3935 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3936 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3937 size_int (32)), unshare_expr (args));
3939 gimplify_assign (addr, tmp, pre_p);
3941 /* update VALIST.__args */
3942 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
3943 gimplify_assign (unshare_expr (args), tmp, pre_p);
3945 addr = fold_convert (build_pointer_type (type), addr);
3947 if (pass_by_reference_p)
3948 addr = build_va_arg_indirect_ref (addr);
3950 return build_va_arg_indirect_ref (addr);
3953 /* Save parameter registers starting with the register that corresponds
3954 to the first unnamed parameters. If the first unnamed parameter is
3955 in the stack then save no registers. Set pretend_args_size to the
3956 amount of space needed to save the registers. */
3958 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3959 tree type, int *pretend_size, int no_rtl)
3968 /* cum currently points to the last named argument, we want to
3969 start at the next argument. */
3970 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3972 offset = -STACK_POINTER_OFFSET;
3973 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3975 tmp = gen_frame_mem (V4SImode,
3976 plus_constant (virtual_incoming_args_rtx,
3978 emit_move_insn (tmp,
3979 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3982 *pretend_size = offset + STACK_POINTER_OFFSET;
3987 spu_conditional_register_usage (void)
3991 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3992 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3996 /* This is called to decide when we can simplify a load instruction. We
3997 must only return true for registers which we know will always be
3998 aligned. Taking into account that CSE might replace this reg with
3999 another one that has not been marked aligned.
4000 So this is really only true for frame, stack and virtual registers,
4001 which we know are always aligned and should not be adversely effected
4004 regno_aligned_for_load (int regno)
4006 return regno == FRAME_POINTER_REGNUM
4007 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
4008 || regno == ARG_POINTER_REGNUM
4009 || regno == STACK_POINTER_REGNUM
4010 || (regno >= FIRST_VIRTUAL_REGISTER
4011 && regno <= LAST_VIRTUAL_REGISTER);
4014 /* Return TRUE when mem is known to be 16-byte aligned. */
4016 aligned_mem_p (rtx mem)
4018 if (MEM_ALIGN (mem) >= 128)
4020 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4022 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4024 rtx p0 = XEXP (XEXP (mem, 0), 0);
4025 rtx p1 = XEXP (XEXP (mem, 0), 1);
4026 if (regno_aligned_for_load (REGNO (p0)))
4028 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4030 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4034 else if (GET_CODE (XEXP (mem, 0)) == REG)
4036 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4039 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4041 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4043 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4044 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4045 if (GET_CODE (p0) == SYMBOL_REF
4046 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4052 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4053 into its SYMBOL_REF_FLAGS. */
4055 spu_encode_section_info (tree decl, rtx rtl, int first)
4057 default_encode_section_info (decl, rtl, first);
4059 /* If a variable has a forced alignment to < 16 bytes, mark it with
4060 SYMBOL_FLAG_ALIGN1. */
4061 if (TREE_CODE (decl) == VAR_DECL
4062 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4063 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4066 /* Return TRUE if we are certain the mem refers to a complete object
4067 which is both 16-byte aligned and padded to a 16-byte boundary. This
4068 would make it safe to store with a single instruction.
4069 We guarantee the alignment and padding for static objects by aligning
4070 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4071 FIXME: We currently cannot guarantee this for objects on the stack
4072 because assign_parm_setup_stack calls assign_stack_local with the
4073 alignment of the parameter mode and in that case the alignment never
4074 gets adjusted by LOCAL_ALIGNMENT. */
4076 store_with_one_insn_p (rtx mem)
4078 rtx addr = XEXP (mem, 0);
4079 if (GET_MODE (mem) == BLKmode)
4081 /* Only static objects. */
4082 if (GET_CODE (addr) == SYMBOL_REF)
4084 /* We use the associated declaration to make sure the access is
4085 referring to the whole object.
4086 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4087 if it is necessary. Will there be cases where one exists, and
4088 the other does not? Will there be cases where both exist, but
4089 have different types? */
4090 tree decl = MEM_EXPR (mem);
4092 && TREE_CODE (decl) == VAR_DECL
4093 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4095 decl = SYMBOL_REF_DECL (addr);
4097 && TREE_CODE (decl) == VAR_DECL
4098 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4105 spu_expand_mov (rtx * ops, enum machine_mode mode)
4107 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4110 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4112 rtx from = SUBREG_REG (ops[1]);
4113 enum machine_mode imode = GET_MODE (from);
4115 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4116 && GET_MODE_CLASS (imode) == MODE_INT
4117 && subreg_lowpart_p (ops[1]));
4119 if (GET_MODE_SIZE (imode) < 4)
4121 from = gen_rtx_SUBREG (SImode, from, 0);
4125 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4127 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4128 emit_insn (GEN_FCN (icode) (ops[0], from));
4131 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4135 /* At least one of the operands needs to be a register. */
4136 if ((reload_in_progress | reload_completed) == 0
4137 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4139 rtx temp = force_reg (mode, ops[1]);
4140 emit_move_insn (ops[0], temp);
4143 if (reload_in_progress || reload_completed)
4145 if (CONSTANT_P (ops[1]))
4146 return spu_split_immediate (ops);
4151 if (GET_CODE (ops[0]) == MEM)
4153 if (!spu_valid_move (ops))
4155 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4156 gen_reg_rtx (TImode)));
4160 else if (GET_CODE (ops[1]) == MEM)
4162 if (!spu_valid_move (ops))
4165 (ops[0], ops[1], gen_reg_rtx (TImode),
4166 gen_reg_rtx (SImode)));
4170 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4172 if (GET_CODE (ops[1]) == CONST_INT)
4174 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4175 if (val != INTVAL (ops[1]))
4177 emit_move_insn (ops[0], GEN_INT (val));
4186 spu_split_load (rtx * ops)
4188 enum machine_mode mode = GET_MODE (ops[0]);
4189 rtx addr, load, rot, mem, p0, p1;
4192 addr = XEXP (ops[1], 0);
4196 if (GET_CODE (addr) == PLUS)
4199 aligned reg + aligned reg => lqx
4200 aligned reg + unaligned reg => lqx, rotqby
4201 aligned reg + aligned const => lqd
4202 aligned reg + unaligned const => lqd, rotqbyi
4203 unaligned reg + aligned reg => lqx, rotqby
4204 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4205 unaligned reg + aligned const => lqd, rotqby
4206 unaligned reg + unaligned const -> not allowed by legitimate address
4208 p0 = XEXP (addr, 0);
4209 p1 = XEXP (addr, 1);
4210 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
4212 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4214 emit_insn (gen_addsi3 (ops[3], p0, p1));
4222 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4224 rot_amt = INTVAL (p1) & 15;
4225 p1 = GEN_INT (INTVAL (p1) & -16);
4226 addr = gen_rtx_PLUS (SImode, p0, p1);
4228 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4232 else if (GET_CODE (addr) == REG)
4234 if (!regno_aligned_for_load (REGNO (addr)))
4237 else if (GET_CODE (addr) == CONST)
4239 if (GET_CODE (XEXP (addr, 0)) == PLUS
4240 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4241 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4243 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4245 addr = gen_rtx_CONST (Pmode,
4246 gen_rtx_PLUS (Pmode,
4247 XEXP (XEXP (addr, 0), 0),
4248 GEN_INT (rot_amt & -16)));
4250 addr = XEXP (XEXP (addr, 0), 0);
4255 else if (GET_CODE (addr) == CONST_INT)
4257 rot_amt = INTVAL (addr);
4258 addr = GEN_INT (rot_amt & -16);
4260 else if (!ALIGNED_SYMBOL_REF_P (addr))
4263 if (GET_MODE_SIZE (mode) < 4)
4264 rot_amt += GET_MODE_SIZE (mode) - 4;
4270 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4277 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4278 mem = change_address (ops[1], TImode, addr);
4280 emit_insn (gen_movti (load, mem));
4283 emit_insn (gen_rotqby_ti (load, load, rot));
4285 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4287 if (reload_completed)
4288 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4290 emit_insn (gen_spu_convert (ops[0], load));
4294 spu_split_store (rtx * ops)
4296 enum machine_mode mode = GET_MODE (ops[0]);
4299 rtx addr, p0, p1, p1_lo, smem;
4303 addr = XEXP (ops[0], 0);
4305 if (GET_CODE (addr) == PLUS)
4308 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4309 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4310 aligned reg + aligned const => lqd, c?d, shuf, stqx
4311 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4312 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4313 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4314 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4315 unaligned reg + unaligned const -> not allowed by legitimate address
4318 p0 = XEXP (addr, 0);
4319 p1 = p1_lo = XEXP (addr, 1);
4320 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4322 p1_lo = GEN_INT (INTVAL (p1) & 15);
4323 p1 = GEN_INT (INTVAL (p1) & -16);
4324 addr = gen_rtx_PLUS (SImode, p0, p1);
4327 else if (GET_CODE (addr) == REG)
4331 p1 = p1_lo = const0_rtx;
4336 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4337 p1 = 0; /* aform doesn't use p1 */
4339 if (ALIGNED_SYMBOL_REF_P (addr))
4341 else if (GET_CODE (addr) == CONST)
4343 if (GET_CODE (XEXP (addr, 0)) == PLUS
4344 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4345 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4347 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4349 addr = gen_rtx_CONST (Pmode,
4350 gen_rtx_PLUS (Pmode,
4351 XEXP (XEXP (addr, 0), 0),
4352 GEN_INT (v & -16)));
4354 addr = XEXP (XEXP (addr, 0), 0);
4355 p1_lo = GEN_INT (v & 15);
4358 else if (GET_CODE (addr) == CONST_INT)
4360 p1_lo = GEN_INT (INTVAL (addr) & 15);
4361 addr = GEN_INT (INTVAL (addr) & -16);
4365 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4367 scalar = store_with_one_insn_p (ops[0]);
4370 /* We could copy the flags from the ops[0] MEM to mem here,
4371 We don't because we want this load to be optimized away if
4372 possible, and copying the flags will prevent that in certain
4373 cases, e.g. consider the volatile flag. */
4375 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4376 set_mem_alias_set (lmem, 0);
4377 emit_insn (gen_movti (reg, lmem));
4379 if (!p0 || regno_aligned_for_load (REGNO (p0)))
4380 p0 = stack_pointer_rtx;
4384 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4385 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4387 else if (reload_completed)
4389 if (GET_CODE (ops[1]) == REG)
4390 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4391 else if (GET_CODE (ops[1]) == SUBREG)
4392 emit_move_insn (reg,
4393 gen_rtx_REG (GET_MODE (reg),
4394 REGNO (SUBREG_REG (ops[1]))));
4400 if (GET_CODE (ops[1]) == REG)
4401 emit_insn (gen_spu_convert (reg, ops[1]));
4402 else if (GET_CODE (ops[1]) == SUBREG)
4403 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4408 if (GET_MODE_SIZE (mode) < 4 && scalar)
4409 emit_insn (gen_shlqby_ti
4410 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4412 smem = change_address (ops[0], TImode, addr);
4413 /* We can't use the previous alias set because the memory has changed
4414 size and can potentially overlap objects of other types. */
4415 set_mem_alias_set (smem, 0);
4417 emit_insn (gen_movti (smem, reg));
4420 /* Return TRUE if X is MEM which is a struct member reference
4421 and the member can safely be loaded and stored with a single
4422 instruction because it is padded. */
4424 mem_is_padded_component_ref (rtx x)
4426 tree t = MEM_EXPR (x);
4428 if (!t || TREE_CODE (t) != COMPONENT_REF)
4430 t = TREE_OPERAND (t, 1);
4431 if (!t || TREE_CODE (t) != FIELD_DECL
4432 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4434 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4435 r = DECL_FIELD_CONTEXT (t);
4436 if (!r || TREE_CODE (r) != RECORD_TYPE)
4438 /* Make sure they are the same mode */
4439 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4441 /* If there are no following fields then the field alignment assures
4442 the structure is padded to the alignment which means this field is
4444 if (TREE_CHAIN (t) == 0)
4446 /* If the following field is also aligned then this field will be
4449 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4454 /* Parse the -mfixed-range= option string. */
4456 fix_range (const char *const_str)
4459 char *str, *dash, *comma;
4461 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4462 REG2 are either register names or register numbers. The effect
4463 of this option is to mark the registers in the range from REG1 to
4464 REG2 as ``fixed'' so they won't be used by the compiler. */
4466 i = strlen (const_str);
4467 str = (char *) alloca (i + 1);
4468 memcpy (str, const_str, i + 1);
4472 dash = strchr (str, '-');
4475 warning (0, "value of -mfixed-range must have form REG1-REG2");
4479 comma = strchr (dash + 1, ',');
4483 first = decode_reg_name (str);
4486 warning (0, "unknown register name: %s", str);
4490 last = decode_reg_name (dash + 1);
4493 warning (0, "unknown register name: %s", dash + 1);
4501 warning (0, "%s-%s is an empty range", str, dash + 1);
4505 for (i = first; i <= last; ++i)
4506 fixed_regs[i] = call_used_regs[i] = 1;
4517 spu_valid_move (rtx * ops)
4519 enum machine_mode mode = GET_MODE (ops[0]);
4520 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4523 /* init_expr_once tries to recog against load and store insns to set
4524 the direct_load[] and direct_store[] arrays. We always want to
4525 consider those loads and stores valid. init_expr_once is called in
4526 the context of a dummy function which does not have a decl. */
4527 if (cfun->decl == 0)
4530 /* Don't allows loads/stores which would require more than 1 insn.
4531 During and after reload we assume loads and stores only take 1
4533 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4535 if (GET_CODE (ops[0]) == MEM
4536 && (GET_MODE_SIZE (mode) < 4
4537 || !(store_with_one_insn_p (ops[0])
4538 || mem_is_padded_component_ref (ops[0]))))
4540 if (GET_CODE (ops[1]) == MEM
4541 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4547 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4548 can be generated using the fsmbi instruction. */
4550 fsmbi_const_p (rtx x)
4554 /* We can always choose TImode for CONST_INT because the high bits
4555 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4556 enum immediate_class c = classify_immediate (x, TImode);
4557 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4562 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4563 can be generated using the cbd, chd, cwd or cdd instruction. */
4565 cpat_const_p (rtx x, enum machine_mode mode)
4569 enum immediate_class c = classify_immediate (x, mode);
4570 return c == IC_CPAT;
4576 gen_cpat_const (rtx * ops)
4578 unsigned char dst[16];
4579 int i, offset, shift, isize;
4580 if (GET_CODE (ops[3]) != CONST_INT
4581 || GET_CODE (ops[2]) != CONST_INT
4582 || (GET_CODE (ops[1]) != CONST_INT
4583 && GET_CODE (ops[1]) != REG))
4585 if (GET_CODE (ops[1]) == REG
4586 && (!REG_POINTER (ops[1])
4587 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4590 for (i = 0; i < 16; i++)
4592 isize = INTVAL (ops[3]);
4595 else if (isize == 2)
4599 offset = (INTVAL (ops[2]) +
4600 (GET_CODE (ops[1]) ==
4601 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4602 for (i = 0; i < isize; i++)
4603 dst[offset + i] = i + shift;
4604 return array_to_constant (TImode, dst);
4607 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4608 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4609 than 16 bytes, the value is repeated across the rest of the array. */
4611 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4616 memset (arr, 0, 16);
4617 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4618 if (GET_CODE (x) == CONST_INT
4619 || (GET_CODE (x) == CONST_DOUBLE
4620 && (mode == SFmode || mode == DFmode)))
4622 gcc_assert (mode != VOIDmode && mode != BLKmode);
4624 if (GET_CODE (x) == CONST_DOUBLE)
4625 val = const_double_to_hwint (x);
4628 first = GET_MODE_SIZE (mode) - 1;
4629 for (i = first; i >= 0; i--)
4631 arr[i] = val & 0xff;
4634 /* Splat the constant across the whole array. */
4635 for (j = 0, i = first + 1; i < 16; i++)
4638 j = (j == first) ? 0 : j + 1;
4641 else if (GET_CODE (x) == CONST_DOUBLE)
4643 val = CONST_DOUBLE_LOW (x);
4644 for (i = 15; i >= 8; i--)
4646 arr[i] = val & 0xff;
4649 val = CONST_DOUBLE_HIGH (x);
4650 for (i = 7; i >= 0; i--)
4652 arr[i] = val & 0xff;
4656 else if (GET_CODE (x) == CONST_VECTOR)
4660 mode = GET_MODE_INNER (mode);
4661 units = CONST_VECTOR_NUNITS (x);
4662 for (i = 0; i < units; i++)
4664 elt = CONST_VECTOR_ELT (x, i);
4665 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4667 if (GET_CODE (elt) == CONST_DOUBLE)
4668 val = const_double_to_hwint (elt);
4671 first = GET_MODE_SIZE (mode) - 1;
4672 if (first + i * GET_MODE_SIZE (mode) > 16)
4674 for (j = first; j >= 0; j--)
4676 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4686 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4687 smaller than 16 bytes, use the bytes that would represent that value
4688 in a register, e.g., for QImode return the value of arr[3]. */
4690 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4692 enum machine_mode inner_mode;
4694 int units, size, i, j, k;
4697 if (GET_MODE_CLASS (mode) == MODE_INT
4698 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4700 j = GET_MODE_SIZE (mode);
4701 i = j < 4 ? 4 - j : 0;
4702 for (val = 0; i < j; i++)
4703 val = (val << 8) | arr[i];
4704 val = trunc_int_for_mode (val, mode);
4705 return GEN_INT (val);
4711 for (i = high = 0; i < 8; i++)
4712 high = (high << 8) | arr[i];
4713 for (i = 8, val = 0; i < 16; i++)
4714 val = (val << 8) | arr[i];
4715 return immed_double_const (val, high, TImode);
4719 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4720 val = trunc_int_for_mode (val, SImode);
4721 return hwint_to_const_double (SFmode, val);
4725 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4727 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
4728 return hwint_to_const_double (DFmode, val);
4731 if (!VECTOR_MODE_P (mode))
4734 units = GET_MODE_NUNITS (mode);
4735 size = GET_MODE_UNIT_SIZE (mode);
4736 inner_mode = GET_MODE_INNER (mode);
4737 v = rtvec_alloc (units);
4739 for (k = i = 0; i < units; ++i)
4742 for (j = 0; j < size; j++, k++)
4743 val = (val << 8) | arr[k];
4745 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4746 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4748 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4753 return gen_rtx_CONST_VECTOR (mode, v);
4757 reloc_diagnostic (rtx x)
4759 tree loc_decl, decl = 0;
4761 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4764 if (GET_CODE (x) == SYMBOL_REF)
4765 decl = SYMBOL_REF_DECL (x);
4766 else if (GET_CODE (x) == CONST
4767 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4768 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4770 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4771 if (decl && !DECL_P (decl))
4774 /* We use last_assemble_variable_decl to get line information. It's
4775 not always going to be right and might not even be close, but will
4776 be right for the more common cases. */
4777 if (!last_assemble_variable_decl || in_section == ctors_section)
4780 loc_decl = last_assemble_variable_decl;
4782 /* The decl could be a string constant. */
4783 if (decl && DECL_P (decl))
4784 msg = "%Jcreating run-time relocation for %qD";
4786 msg = "creating run-time relocation";
4788 if (TARGET_WARN_RELOC)
4789 warning (0, msg, loc_decl, decl);
4791 error (msg, loc_decl, decl);
4794 /* Hook into assemble_integer so we can generate an error for run-time
4795 relocations. The SPU ABI disallows them. */
4797 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4799 /* By default run-time relocations aren't supported, but we allow them
4800 in case users support it in their own run-time loader. And we provide
4801 a warning for those users that don't. */
4802 if ((GET_CODE (x) == SYMBOL_REF)
4803 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4804 reloc_diagnostic (x);
4806 return default_assemble_integer (x, size, aligned_p);
4810 spu_asm_globalize_label (FILE * file, const char *name)
4812 fputs ("\t.global\t", file);
4813 assemble_name (file, name);
4818 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4819 bool speed ATTRIBUTE_UNUSED)
4821 enum machine_mode mode = GET_MODE (x);
4822 int cost = COSTS_N_INSNS (2);
4824 /* Folding to a CONST_VECTOR will use extra space but there might
4825 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4826 only if it allows us to fold away multiple insns. Changing the cost
4827 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4828 because this cost will only be compared against a single insn.
4829 if (code == CONST_VECTOR)
4830 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4833 /* Use defaults for float operations. Not accurate but good enough. */
4836 *total = COSTS_N_INSNS (13);
4841 *total = COSTS_N_INSNS (6);
4847 if (satisfies_constraint_K (x))
4849 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4850 *total = COSTS_N_INSNS (1);
4852 *total = COSTS_N_INSNS (3);
4856 *total = COSTS_N_INSNS (3);
4861 *total = COSTS_N_INSNS (0);
4865 *total = COSTS_N_INSNS (5);
4869 case FLOAT_TRUNCATE:
4871 case UNSIGNED_FLOAT:
4874 *total = COSTS_N_INSNS (7);
4880 *total = COSTS_N_INSNS (9);
4887 GET_CODE (XEXP (x, 0)) ==
4888 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4889 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4891 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4893 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4894 cost = COSTS_N_INSNS (14);
4895 if ((val & 0xffff) == 0)
4896 cost = COSTS_N_INSNS (9);
4897 else if (val > 0 && val < 0x10000)
4898 cost = COSTS_N_INSNS (11);
4907 *total = COSTS_N_INSNS (20);
4914 *total = COSTS_N_INSNS (4);
4917 if (XINT (x, 1) == UNSPEC_CONVERT)
4918 *total = COSTS_N_INSNS (0);
4920 *total = COSTS_N_INSNS (4);
4923 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4924 if (GET_MODE_CLASS (mode) == MODE_INT
4925 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4926 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4927 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4932 static enum machine_mode
4933 spu_unwind_word_mode (void)
4938 /* Decide whether we can make a sibling call to a function. DECL is the
4939 declaration of the function being targeted by the call and EXP is the
4940 CALL_EXPR representing the call. */
4942 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4944 return decl && !TARGET_LARGE_MEM;
4947 /* We need to correctly update the back chain pointer and the Available
4948 Stack Size (which is in the second slot of the sp register.) */
4950 spu_allocate_stack (rtx op0, rtx op1)
4953 rtx chain = gen_reg_rtx (V4SImode);
4954 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4955 rtx sp = gen_reg_rtx (V4SImode);
4956 rtx splatted = gen_reg_rtx (V4SImode);
4957 rtx pat = gen_reg_rtx (TImode);
4959 /* copy the back chain so we can save it back again. */
4960 emit_move_insn (chain, stack_bot);
4962 op1 = force_reg (SImode, op1);
4964 v = 0x1020300010203ll;
4965 emit_move_insn (pat, immed_double_const (v, v, TImode));
4966 emit_insn (gen_shufb (splatted, op1, op1, pat));
4968 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4969 emit_insn (gen_subv4si3 (sp, sp, splatted));
4971 if (flag_stack_check)
4973 rtx avail = gen_reg_rtx(SImode);
4974 rtx result = gen_reg_rtx(SImode);
4975 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4976 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4977 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4980 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4982 emit_move_insn (stack_bot, chain);
4984 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4988 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4990 static unsigned char arr[16] =
4991 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4992 rtx temp = gen_reg_rtx (SImode);
4993 rtx temp2 = gen_reg_rtx (SImode);
4994 rtx temp3 = gen_reg_rtx (V4SImode);
4995 rtx temp4 = gen_reg_rtx (V4SImode);
4996 rtx pat = gen_reg_rtx (TImode);
4997 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4999 /* Restore the backchain from the first word, sp from the second. */
5000 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5001 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5003 emit_move_insn (pat, array_to_constant (TImode, arr));
5005 /* Compute Available Stack Size for sp */
5006 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5007 emit_insn (gen_shufb (temp3, temp, temp, pat));
5009 /* Compute Available Stack Size for back chain */
5010 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5011 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5012 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5014 emit_insn (gen_addv4si3 (sp, sp, temp3));
5015 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5019 spu_init_libfuncs (void)
5021 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5022 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5023 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5024 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5025 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5026 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5027 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5028 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5029 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5030 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5031 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5033 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5034 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5036 set_optab_libfunc (smul_optab, TImode, "__multi3");
5037 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5038 set_optab_libfunc (smod_optab, TImode, "__modti3");
5039 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5040 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5041 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5044 /* Make a subreg, stripping any existing subreg. We could possibly just
5045 call simplify_subreg, but in this case we know what we want. */
5047 spu_gen_subreg (enum machine_mode mode, rtx x)
5049 if (GET_CODE (x) == SUBREG)
5051 if (GET_MODE (x) == mode)
5053 return gen_rtx_SUBREG (mode, x, 0);
5057 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5059 return (TYPE_MODE (type) == BLKmode
5061 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5062 || int_size_in_bytes (type) >
5063 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5066 /* Create the built-in types and functions */
5068 struct spu_builtin_description spu_builtins[] = {
5069 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5070 {fcode, icode, name, type, params, NULL_TREE},
5071 #include "spu-builtins.def"
5076 spu_init_builtins (void)
5078 struct spu_builtin_description *d;
5081 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5082 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5083 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5084 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5085 V4SF_type_node = build_vector_type (float_type_node, 4);
5086 V2DF_type_node = build_vector_type (double_type_node, 2);
5088 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5089 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5090 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5091 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5093 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5095 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5096 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5097 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5098 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5099 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5100 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5101 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5102 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5103 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5104 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5105 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5106 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5108 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5109 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5110 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5111 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5112 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5113 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5114 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5115 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5117 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5118 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5120 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5122 spu_builtin_types[SPU_BTI_PTR] =
5123 build_pointer_type (build_qualified_type
5125 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5127 /* For each builtin we build a new prototype. The tree code will make
5128 sure nodes are shared. */
5129 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5132 char name[64]; /* build_function will make a copy. */
5138 /* Find last parm. */
5139 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5144 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5146 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5148 sprintf (name, "__builtin_%s", d->name);
5150 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5152 if (d->fcode == SPU_MASK_FOR_LOAD)
5153 TREE_READONLY (d->fndecl) = 1;
5155 /* These builtins don't throw. */
5156 TREE_NOTHROW (d->fndecl) = 1;
5161 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5163 static unsigned char arr[16] =
5164 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5166 rtx temp = gen_reg_rtx (Pmode);
5167 rtx temp2 = gen_reg_rtx (V4SImode);
5168 rtx temp3 = gen_reg_rtx (V4SImode);
5169 rtx pat = gen_reg_rtx (TImode);
5170 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5172 emit_move_insn (pat, array_to_constant (TImode, arr));
5174 /* Restore the sp. */
5175 emit_move_insn (temp, op1);
5176 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5178 /* Compute available stack size for sp. */
5179 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5180 emit_insn (gen_shufb (temp3, temp, temp, pat));
5182 emit_insn (gen_addv4si3 (sp, sp, temp3));
5183 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5187 spu_safe_dma (HOST_WIDE_INT channel)
5189 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5193 spu_builtin_splats (rtx ops[])
5195 enum machine_mode mode = GET_MODE (ops[0]);
5196 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5198 unsigned char arr[16];
5199 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5200 emit_move_insn (ops[0], array_to_constant (mode, arr));
5204 rtx reg = gen_reg_rtx (TImode);
5206 if (GET_CODE (ops[1]) != REG
5207 && GET_CODE (ops[1]) != SUBREG)
5208 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5214 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5220 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5225 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5230 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5236 emit_move_insn (reg, shuf);
5237 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5242 spu_builtin_extract (rtx ops[])
5244 enum machine_mode mode;
5247 mode = GET_MODE (ops[1]);
5249 if (GET_CODE (ops[2]) == CONST_INT)
5254 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5257 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5260 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5263 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5266 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5269 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5277 from = spu_gen_subreg (TImode, ops[1]);
5278 rot = gen_reg_rtx (TImode);
5279 tmp = gen_reg_rtx (SImode);
5284 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5287 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5288 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5292 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5296 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5301 emit_insn (gen_rotqby_ti (rot, from, tmp));
5303 emit_insn (gen_spu_convert (ops[0], rot));
5307 spu_builtin_insert (rtx ops[])
5309 enum machine_mode mode = GET_MODE (ops[0]);
5310 enum machine_mode imode = GET_MODE_INNER (mode);
5311 rtx mask = gen_reg_rtx (TImode);
5314 if (GET_CODE (ops[3]) == CONST_INT)
5315 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5318 offset = gen_reg_rtx (SImode);
5319 emit_insn (gen_mulsi3
5320 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5323 (mask, stack_pointer_rtx, offset,
5324 GEN_INT (GET_MODE_SIZE (imode))));
5325 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5329 spu_builtin_promote (rtx ops[])
5331 enum machine_mode mode, imode;
5332 rtx rot, from, offset;
5335 mode = GET_MODE (ops[0]);
5336 imode = GET_MODE_INNER (mode);
5338 from = gen_reg_rtx (TImode);
5339 rot = spu_gen_subreg (TImode, ops[0]);
5341 emit_insn (gen_spu_convert (from, ops[1]));
5343 if (GET_CODE (ops[2]) == CONST_INT)
5345 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5346 if (GET_MODE_SIZE (imode) < 4)
5347 pos += 4 - GET_MODE_SIZE (imode);
5348 offset = GEN_INT (pos & 15);
5352 offset = gen_reg_rtx (SImode);
5356 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5359 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5360 emit_insn (gen_addsi3 (offset, offset, offset));
5364 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5365 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5369 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5375 emit_insn (gen_rotqby_ti (rot, from, offset));
5379 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5381 rtx shuf = gen_reg_rtx (V4SImode);
5382 rtx insn = gen_reg_rtx (V4SImode);
5387 fnaddr = force_reg (SImode, fnaddr);
5388 cxt = force_reg (SImode, cxt);
5390 if (TARGET_LARGE_MEM)
5392 rtx rotl = gen_reg_rtx (V4SImode);
5393 rtx mask = gen_reg_rtx (V4SImode);
5394 rtx bi = gen_reg_rtx (SImode);
5395 unsigned char shufa[16] = {
5396 2, 3, 0, 1, 18, 19, 16, 17,
5397 0, 1, 2, 3, 16, 17, 18, 19
5399 unsigned char insna[16] = {
5401 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5403 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5406 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5407 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5409 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5410 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5411 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5412 emit_insn (gen_selb (insn, insnc, rotl, mask));
5414 mem = memory_address (Pmode, tramp);
5415 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5417 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5418 mem = memory_address (Pmode, plus_constant (tramp, 16));
5419 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5423 rtx scxt = gen_reg_rtx (SImode);
5424 rtx sfnaddr = gen_reg_rtx (SImode);
5425 unsigned char insna[16] = {
5426 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5432 shufc = gen_reg_rtx (TImode);
5433 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5435 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5436 fits 18 bits and the last 4 are zeros. This will be true if
5437 the stack pointer is initialized to 0x3fff0 at program start,
5438 otherwise the ila instruction will be garbage. */
5440 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5441 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5443 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5444 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5445 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5447 mem = memory_address (Pmode, tramp);
5448 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5451 emit_insn (gen_sync ());
5455 spu_expand_sign_extend (rtx ops[])
5457 unsigned char arr[16];
5458 rtx pat = gen_reg_rtx (TImode);
5461 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5462 if (GET_MODE (ops[1]) == QImode)
5464 sign = gen_reg_rtx (HImode);
5465 emit_insn (gen_extendqihi2 (sign, ops[1]));
5466 for (i = 0; i < 16; i++)
5472 for (i = 0; i < 16; i++)
5474 switch (GET_MODE (ops[1]))
5477 sign = gen_reg_rtx (SImode);
5478 emit_insn (gen_extendhisi2 (sign, ops[1]));
5480 arr[last - 1] = 0x02;
5483 sign = gen_reg_rtx (SImode);
5484 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5485 for (i = 0; i < 4; i++)
5486 arr[last - i] = 3 - i;
5489 sign = gen_reg_rtx (SImode);
5490 c = gen_reg_rtx (SImode);
5491 emit_insn (gen_spu_convert (c, ops[1]));
5492 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5493 for (i = 0; i < 8; i++)
5494 arr[last - i] = 7 - i;
5500 emit_move_insn (pat, array_to_constant (TImode, arr));
5501 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5504 /* expand vector initialization. If there are any constant parts,
5505 load constant parts first. Then load any non-constant parts. */
5507 spu_expand_vector_init (rtx target, rtx vals)
5509 enum machine_mode mode = GET_MODE (target);
5510 int n_elts = GET_MODE_NUNITS (mode);
5512 bool all_same = true;
5513 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5516 first = XVECEXP (vals, 0, 0);
5517 for (i = 0; i < n_elts; ++i)
5519 x = XVECEXP (vals, 0, i);
5520 if (!(CONST_INT_P (x)
5521 || GET_CODE (x) == CONST_DOUBLE
5522 || GET_CODE (x) == CONST_FIXED))
5526 if (first_constant == NULL_RTX)
5529 if (i > 0 && !rtx_equal_p (x, first))
5533 /* if all elements are the same, use splats to repeat elements */
5536 if (!CONSTANT_P (first)
5537 && !register_operand (first, GET_MODE (x)))
5538 first = force_reg (GET_MODE (first), first);
5539 emit_insn (gen_spu_splats (target, first));
5543 /* load constant parts */
5544 if (n_var != n_elts)
5548 emit_move_insn (target,
5549 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5553 rtx constant_parts_rtx = copy_rtx (vals);
5555 gcc_assert (first_constant != NULL_RTX);
5556 /* fill empty slots with the first constant, this increases
5557 our chance of using splats in the recursive call below. */
5558 for (i = 0; i < n_elts; ++i)
5560 x = XVECEXP (constant_parts_rtx, 0, i);
5561 if (!(CONST_INT_P (x)
5562 || GET_CODE (x) == CONST_DOUBLE
5563 || GET_CODE (x) == CONST_FIXED))
5564 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5567 spu_expand_vector_init (target, constant_parts_rtx);
5571 /* load variable parts */
5574 rtx insert_operands[4];
5576 insert_operands[0] = target;
5577 insert_operands[2] = target;
5578 for (i = 0; i < n_elts; ++i)
5580 x = XVECEXP (vals, 0, i);
5581 if (!(CONST_INT_P (x)
5582 || GET_CODE (x) == CONST_DOUBLE
5583 || GET_CODE (x) == CONST_FIXED))
5585 if (!register_operand (x, GET_MODE (x)))
5586 x = force_reg (GET_MODE (x), x);
5587 insert_operands[1] = x;
5588 insert_operands[3] = GEN_INT (i);
5589 spu_builtin_insert (insert_operands);
5595 /* Return insn index for the vector compare instruction for given CODE,
5596 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5599 get_vec_cmp_insn (enum rtx_code code,
5600 enum machine_mode dest_mode,
5601 enum machine_mode op_mode)
5607 if (dest_mode == V16QImode && op_mode == V16QImode)
5608 return CODE_FOR_ceq_v16qi;
5609 if (dest_mode == V8HImode && op_mode == V8HImode)
5610 return CODE_FOR_ceq_v8hi;
5611 if (dest_mode == V4SImode && op_mode == V4SImode)
5612 return CODE_FOR_ceq_v4si;
5613 if (dest_mode == V4SImode && op_mode == V4SFmode)
5614 return CODE_FOR_ceq_v4sf;
5615 if (dest_mode == V2DImode && op_mode == V2DFmode)
5616 return CODE_FOR_ceq_v2df;
5619 if (dest_mode == V16QImode && op_mode == V16QImode)
5620 return CODE_FOR_cgt_v16qi;
5621 if (dest_mode == V8HImode && op_mode == V8HImode)
5622 return CODE_FOR_cgt_v8hi;
5623 if (dest_mode == V4SImode && op_mode == V4SImode)
5624 return CODE_FOR_cgt_v4si;
5625 if (dest_mode == V4SImode && op_mode == V4SFmode)
5626 return CODE_FOR_cgt_v4sf;
5627 if (dest_mode == V2DImode && op_mode == V2DFmode)
5628 return CODE_FOR_cgt_v2df;
5631 if (dest_mode == V16QImode && op_mode == V16QImode)
5632 return CODE_FOR_clgt_v16qi;
5633 if (dest_mode == V8HImode && op_mode == V8HImode)
5634 return CODE_FOR_clgt_v8hi;
5635 if (dest_mode == V4SImode && op_mode == V4SImode)
5636 return CODE_FOR_clgt_v4si;
5644 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5645 DMODE is expected destination mode. This is a recursive function. */
5648 spu_emit_vector_compare (enum rtx_code rcode,
5650 enum machine_mode dmode)
5654 enum machine_mode dest_mode;
5655 enum machine_mode op_mode = GET_MODE (op1);
5657 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5659 /* Floating point vector compare instructions uses destination V4SImode.
5660 Double floating point vector compare instructions uses destination V2DImode.
5661 Move destination to appropriate mode later. */
5662 if (dmode == V4SFmode)
5663 dest_mode = V4SImode;
5664 else if (dmode == V2DFmode)
5665 dest_mode = V2DImode;
5669 mask = gen_reg_rtx (dest_mode);
5670 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5672 if (vec_cmp_insn == -1)
5674 bool swap_operands = false;
5675 bool try_again = false;
5680 swap_operands = true;
5685 swap_operands = true;
5689 /* Treat A != B as ~(A==B). */
5691 enum insn_code nor_code;
5692 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5693 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5694 gcc_assert (nor_code != CODE_FOR_nothing);
5695 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5696 if (dmode != dest_mode)
5698 rtx temp = gen_reg_rtx (dest_mode);
5699 convert_move (temp, mask, 0);
5709 /* Try GT/GTU/LT/LTU OR EQ */
5712 enum insn_code ior_code;
5713 enum rtx_code new_code;
5717 case GE: new_code = GT; break;
5718 case GEU: new_code = GTU; break;
5719 case LE: new_code = LT; break;
5720 case LEU: new_code = LTU; break;
5725 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5726 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5728 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5729 gcc_assert (ior_code != CODE_FOR_nothing);
5730 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5731 if (dmode != dest_mode)
5733 rtx temp = gen_reg_rtx (dest_mode);
5734 convert_move (temp, mask, 0);
5744 /* You only get two chances. */
5746 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5748 gcc_assert (vec_cmp_insn != -1);
5759 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5760 if (dmode != dest_mode)
5762 rtx temp = gen_reg_rtx (dest_mode);
5763 convert_move (temp, mask, 0);
5770 /* Emit vector conditional expression.
5771 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5772 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5775 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5776 rtx cond, rtx cc_op0, rtx cc_op1)
5778 enum machine_mode dest_mode = GET_MODE (dest);
5779 enum rtx_code rcode = GET_CODE (cond);
5782 /* Get the vector mask for the given relational operations. */
5783 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5785 emit_insn(gen_selb (dest, op2, op1, mask));
5791 spu_force_reg (enum machine_mode mode, rtx op)
5794 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5796 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5797 || GET_MODE (op) == BLKmode)
5798 return force_reg (mode, convert_to_mode (mode, op, 0));
5802 r = force_reg (GET_MODE (op), op);
5803 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5805 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5810 x = gen_reg_rtx (mode);
5811 emit_insn (gen_spu_convert (x, r));
5816 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5818 HOST_WIDE_INT v = 0;
5820 /* Check the range of immediate operands. */
5821 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5823 int range = p - SPU_BTI_7;
5825 if (!CONSTANT_P (op))
5826 error ("%s expects an integer literal in the range [%d, %d].",
5828 spu_builtin_range[range].low, spu_builtin_range[range].high);
5830 if (GET_CODE (op) == CONST
5831 && (GET_CODE (XEXP (op, 0)) == PLUS
5832 || GET_CODE (XEXP (op, 0)) == MINUS))
5834 v = INTVAL (XEXP (XEXP (op, 0), 1));
5835 op = XEXP (XEXP (op, 0), 0);
5837 else if (GET_CODE (op) == CONST_INT)
5839 else if (GET_CODE (op) == CONST_VECTOR
5840 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5841 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5843 /* The default for v is 0 which is valid in every range. */
5844 if (v < spu_builtin_range[range].low
5845 || v > spu_builtin_range[range].high)
5846 error ("%s expects an integer literal in the range [%d, %d]. ("
5847 HOST_WIDE_INT_PRINT_DEC ")",
5849 spu_builtin_range[range].low, spu_builtin_range[range].high,
5858 /* This is only used in lqa, and stqa. Even though the insns
5859 encode 16 bits of the address (all but the 2 least
5860 significant), only 14 bits are used because it is masked to
5861 be 16 byte aligned. */
5865 /* This is used for lqr and stqr. */
5872 if (GET_CODE (op) == LABEL_REF
5873 || (GET_CODE (op) == SYMBOL_REF
5874 && SYMBOL_REF_FUNCTION_P (op))
5875 || (v & ((1 << lsbits) - 1)) != 0)
5876 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5883 expand_builtin_args (struct spu_builtin_description *d, tree exp,
5884 rtx target, rtx ops[])
5886 enum insn_code icode = d->icode;
5889 /* Expand the arguments into rtl. */
5891 if (d->parm[0] != SPU_BTI_VOID)
5894 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
5896 tree arg = CALL_EXPR_ARG (exp, a);
5899 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
5904 spu_expand_builtin_1 (struct spu_builtin_description *d,
5905 tree exp, rtx target)
5909 enum insn_code icode = d->icode;
5910 enum machine_mode mode, tmode;
5914 /* Set up ops[] with values from arglist. */
5915 expand_builtin_args (d, exp, target, ops);
5917 /* Handle the target operand which must be operand 0. */
5919 if (d->parm[0] != SPU_BTI_VOID)
5922 /* We prefer the mode specified for the match_operand otherwise
5923 use the mode from the builtin function prototype. */
5924 tmode = insn_data[d->icode].operand[0].mode;
5925 if (tmode == VOIDmode)
5926 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5928 /* Try to use target because not using it can lead to extra copies
5929 and when we are using all of the registers extra copies leads
5931 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5934 target = ops[0] = gen_reg_rtx (tmode);
5936 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5942 if (d->fcode == SPU_MASK_FOR_LOAD)
5944 enum machine_mode mode = insn_data[icode].operand[1].mode;
5949 arg = CALL_EXPR_ARG (exp, 0);
5950 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5951 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5952 addr = memory_address (mode, op);
5955 op = gen_reg_rtx (GET_MODE (addr));
5956 emit_insn (gen_rtx_SET (VOIDmode, op,
5957 gen_rtx_NEG (GET_MODE (addr), addr)));
5958 op = gen_rtx_MEM (mode, op);
5960 pat = GEN_FCN (icode) (target, op);
5967 /* Ignore align_hint, but still expand it's args in case they have
5969 if (icode == CODE_FOR_spu_align_hint)
5972 /* Handle the rest of the operands. */
5973 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5975 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5976 mode = insn_data[d->icode].operand[i].mode;
5978 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5980 /* mode can be VOIDmode here for labels */
5982 /* For specific intrinsics with an immediate operand, e.g.,
5983 si_ai(), we sometimes need to convert the scalar argument to a
5984 vector argument by splatting the scalar. */
5985 if (VECTOR_MODE_P (mode)
5986 && (GET_CODE (ops[i]) == CONST_INT
5987 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
5988 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
5990 if (GET_CODE (ops[i]) == CONST_INT)
5991 ops[i] = spu_const (mode, INTVAL (ops[i]));
5994 rtx reg = gen_reg_rtx (mode);
5995 enum machine_mode imode = GET_MODE_INNER (mode);
5996 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5997 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5998 if (imode != GET_MODE (ops[i]))
5999 ops[i] = convert_to_mode (imode, ops[i],
6000 TYPE_UNSIGNED (spu_builtin_types
6002 emit_insn (gen_spu_splats (reg, ops[i]));
6007 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6009 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6010 ops[i] = spu_force_reg (mode, ops[i]);
6013 switch (insn_data[icode].n_operands)
6016 pat = GEN_FCN (icode) (0);
6019 pat = GEN_FCN (icode) (ops[0]);
6022 pat = GEN_FCN (icode) (ops[0], ops[1]);
6025 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6028 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6031 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6034 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6043 if (d->type == B_CALL || d->type == B_BISLED)
6044 emit_call_insn (pat);
6045 else if (d->type == B_JUMP)
6047 emit_jump_insn (pat);
6053 return_type = spu_builtin_types[d->parm[0]];
6054 if (d->parm[0] != SPU_BTI_VOID
6055 && GET_MODE (target) != TYPE_MODE (return_type))
6057 /* target is the return value. It should always be the mode of
6058 the builtin function prototype. */
6059 target = spu_force_reg (TYPE_MODE (return_type), target);
6066 spu_expand_builtin (tree exp,
6068 rtx subtarget ATTRIBUTE_UNUSED,
6069 enum machine_mode mode ATTRIBUTE_UNUSED,
6070 int ignore ATTRIBUTE_UNUSED)
6072 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6073 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6074 struct spu_builtin_description *d;
6076 if (fcode < NUM_SPU_BUILTINS)
6078 d = &spu_builtins[fcode];
6080 return spu_expand_builtin_1 (d, exp, target);
6085 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6087 spu_builtin_mul_widen_even (tree type)
6089 switch (TYPE_MODE (type))
6092 if (TYPE_UNSIGNED (type))
6093 return spu_builtins[SPU_MULE_0].fndecl;
6095 return spu_builtins[SPU_MULE_1].fndecl;
6102 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6104 spu_builtin_mul_widen_odd (tree type)
6106 switch (TYPE_MODE (type))
6109 if (TYPE_UNSIGNED (type))
6110 return spu_builtins[SPU_MULO_1].fndecl;
6112 return spu_builtins[SPU_MULO_0].fndecl;
6119 /* Implement targetm.vectorize.builtin_mask_for_load. */
6121 spu_builtin_mask_for_load (void)
6123 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6128 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6130 spu_builtin_vectorization_cost (bool runtime_test)
6132 /* If the branch of the runtime test is taken - i.e. - the vectorized
6133 version is skipped - this incurs a misprediction cost (because the
6134 vectorized version is expected to be the fall-through). So we subtract
6135 the latency of a mispredicted branch from the costs that are incurred
6136 when the vectorized version is executed. */
6143 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6144 after applying N number of iterations. This routine does not determine
6145 how may iterations are required to reach desired alignment. */
6148 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6153 /* All other types are naturally aligned. */
6157 /* Implement targetm.vectorize.builtin_vec_perm. */
6159 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6161 struct spu_builtin_description *d;
6163 *mask_element_type = unsigned_char_type_node;
6165 switch (TYPE_MODE (type))
6168 if (TYPE_UNSIGNED (type))
6169 d = &spu_builtins[SPU_SHUFFLE_0];
6171 d = &spu_builtins[SPU_SHUFFLE_1];
6175 if (TYPE_UNSIGNED (type))
6176 d = &spu_builtins[SPU_SHUFFLE_2];
6178 d = &spu_builtins[SPU_SHUFFLE_3];
6182 if (TYPE_UNSIGNED (type))
6183 d = &spu_builtins[SPU_SHUFFLE_4];
6185 d = &spu_builtins[SPU_SHUFFLE_5];
6189 if (TYPE_UNSIGNED (type))
6190 d = &spu_builtins[SPU_SHUFFLE_6];
6192 d = &spu_builtins[SPU_SHUFFLE_7];
6196 d = &spu_builtins[SPU_SHUFFLE_8];
6200 d = &spu_builtins[SPU_SHUFFLE_9];
6211 /* Count the total number of instructions in each pipe and return the
6212 maximum, which is used as the Minimum Iteration Interval (MII)
6213 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6214 -2 are instructions that can go in pipe0 or pipe1. */
6216 spu_sms_res_mii (struct ddg *g)
6219 unsigned t[4] = {0, 0, 0, 0};
6221 for (i = 0; i < g->num_nodes; i++)
6223 rtx insn = g->nodes[i].insn;
6224 int p = get_pipe (insn) + 2;
6230 if (dump_file && INSN_P (insn))
6231 fprintf (dump_file, "i%d %s %d %d\n",
6233 insn_data[INSN_CODE(insn)].name,
6237 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6239 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6244 spu_init_expanders (void)
6246 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6247 * frame_pointer_needed is true. We don't know that until we're
6248 * expanding the prologue. */
6250 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6253 static enum machine_mode
6254 spu_libgcc_cmp_return_mode (void)
6257 /* For SPU word mode is TI mode so it is better to use SImode
6258 for compare returns. */
6262 static enum machine_mode
6263 spu_libgcc_shift_count_mode (void)
6265 /* For SPU word mode is TI mode so it is better to use SImode
6266 for shift counts. */
6270 /* An early place to adjust some flags after GCC has finished processing
6273 asm_file_start (void)
6275 /* Variable tracking should be run after all optimizations which
6276 change order of insns. It also needs a valid CFG. */
6277 spu_flag_var_tracking = flag_var_tracking;
6278 flag_var_tracking = 0;
6280 default_file_start ();