1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
61 /* Builtin types, data and prototypes. */
62 struct spu_builtin_range
67 static struct spu_builtin_range spu_builtin_range[] = {
68 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
69 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
70 {0ll, 0x7fll}, /* SPU_BTI_U7 */
71 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
72 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
73 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
74 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
75 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
76 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
77 {0ll, 0xffffll}, /* SPU_BTI_U16 */
78 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
79 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
83 /* Target specific attribute specifications. */
84 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
86 /* Prototypes and external defs. */
87 static void spu_init_builtins (void);
88 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
89 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
90 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
91 static rtx get_pic_reg (void);
92 static int need_to_save_reg (int regno, int saving);
93 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
94 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
95 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
97 static void emit_nop_for_insn (rtx insn);
98 static bool insn_clobbers_hbr (rtx insn);
99 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
100 int distance, sbitmap blocks);
101 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
102 enum machine_mode dmode);
103 static rtx get_branch_target (rtx branch);
104 static void spu_machine_dependent_reorg (void);
105 static int spu_sched_issue_rate (void);
106 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
108 static int get_pipe (rtx insn);
109 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110 static void spu_sched_init_global (FILE *, int, int);
111 static void spu_sched_init (FILE *, int, int);
112 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
113 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
115 unsigned char *no_add_attrs);
116 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
118 unsigned char *no_add_attrs);
119 static int spu_naked_function_p (tree func);
120 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
121 const_tree type, unsigned char named);
122 static tree spu_build_builtin_va_list (void);
123 static void spu_va_start (tree, rtx);
124 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
125 gimple_seq * pre_p, gimple_seq * post_p);
126 static int regno_aligned_for_load (int regno);
127 static int store_with_one_insn_p (rtx mem);
128 static int mem_is_padded_component_ref (rtx x);
129 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
130 static void spu_asm_globalize_label (FILE * file, const char *name);
131 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
132 int *total, bool speed);
133 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
134 static void spu_init_libfuncs (void);
135 static bool spu_return_in_memory (const_tree type, const_tree fntype);
136 static void fix_range (const char *);
137 static void spu_encode_section_info (tree, rtx, int);
138 static tree spu_builtin_mul_widen_even (tree);
139 static tree spu_builtin_mul_widen_odd (tree);
140 static tree spu_builtin_mask_for_load (void);
141 static int spu_builtin_vectorization_cost (bool);
142 static bool spu_vector_alignment_reachable (const_tree, bool);
143 static tree spu_builtin_vec_perm (tree, tree *);
144 static int spu_sms_res_mii (struct ddg *g);
145 static void asm_file_start (void);
146 static unsigned int spu_section_type_flags (tree, const char *, int);
148 extern const char *reg_names[];
149 rtx spu_compare_op0, spu_compare_op1;
151 /* Which instruction set architecture to use. */
153 /* Which cpu are we tuning for. */
156 /* The hardware requires 8 insns between a hint and the branch it
157 effects. This variable describes how many rtl instructions the
158 compiler needs to see before inserting a hint, and then the compiler
159 will insert enough nops to make it at least 8 insns. The default is
160 for the compiler to allow up to 2 nops be emitted. The nops are
161 inserted in pairs, so we round down. */
162 int spu_hint_dist = (8*4) - (2*4);
164 /* Determines whether we run variable tracking in machine dependent
166 static int spu_flag_var_tracking;
181 IC_POOL, /* constant pool */
182 IC_IL1, /* one il* instruction */
183 IC_IL2, /* both ilhu and iohl instructions */
184 IC_IL1s, /* one il* instruction */
185 IC_IL2s, /* both ilhu and iohl instructions */
186 IC_FSMBI, /* the fsmbi instruction */
187 IC_CPAT, /* one of the c*d instructions */
188 IC_FSMBI2 /* fsmbi plus 1 other instruction */
191 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
192 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
193 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
194 static enum immediate_class classify_immediate (rtx op,
195 enum machine_mode mode);
197 static enum machine_mode spu_unwind_word_mode (void);
199 static enum machine_mode
200 spu_libgcc_cmp_return_mode (void);
202 static enum machine_mode
203 spu_libgcc_shift_count_mode (void);
205 /* Built in types. */
206 tree spu_builtin_types[SPU_BTI_MAX];
208 /* TARGET overrides. */
210 #undef TARGET_INIT_BUILTINS
211 #define TARGET_INIT_BUILTINS spu_init_builtins
213 #undef TARGET_EXPAND_BUILTIN
214 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
216 #undef TARGET_UNWIND_WORD_MODE
217 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
219 /* The .8byte directive doesn't seem to work well for a 32 bit
221 #undef TARGET_ASM_UNALIGNED_DI_OP
222 #define TARGET_ASM_UNALIGNED_DI_OP NULL
224 #undef TARGET_RTX_COSTS
225 #define TARGET_RTX_COSTS spu_rtx_costs
227 #undef TARGET_ADDRESS_COST
228 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
230 #undef TARGET_SCHED_ISSUE_RATE
231 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
233 #undef TARGET_SCHED_INIT_GLOBAL
234 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
236 #undef TARGET_SCHED_INIT
237 #define TARGET_SCHED_INIT spu_sched_init
239 #undef TARGET_SCHED_VARIABLE_ISSUE
240 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
242 #undef TARGET_SCHED_REORDER
243 #define TARGET_SCHED_REORDER spu_sched_reorder
245 #undef TARGET_SCHED_REORDER2
246 #define TARGET_SCHED_REORDER2 spu_sched_reorder
248 #undef TARGET_SCHED_ADJUST_COST
249 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
251 const struct attribute_spec spu_attribute_table[];
252 #undef TARGET_ATTRIBUTE_TABLE
253 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
255 #undef TARGET_ASM_INTEGER
256 #define TARGET_ASM_INTEGER spu_assemble_integer
258 #undef TARGET_SCALAR_MODE_SUPPORTED_P
259 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
261 #undef TARGET_VECTOR_MODE_SUPPORTED_P
262 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
264 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
265 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
267 #undef TARGET_ASM_GLOBALIZE_LABEL
268 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
270 #undef TARGET_PASS_BY_REFERENCE
271 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
273 #undef TARGET_MUST_PASS_IN_STACK
274 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
276 #undef TARGET_BUILD_BUILTIN_VA_LIST
277 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
279 #undef TARGET_EXPAND_BUILTIN_VA_START
280 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
282 #undef TARGET_SETUP_INCOMING_VARARGS
283 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
285 #undef TARGET_MACHINE_DEPENDENT_REORG
286 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
288 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
289 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
291 #undef TARGET_DEFAULT_TARGET_FLAGS
292 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
294 #undef TARGET_INIT_LIBFUNCS
295 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
297 #undef TARGET_RETURN_IN_MEMORY
298 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
300 #undef TARGET_ENCODE_SECTION_INFO
301 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
303 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
304 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
306 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
307 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
309 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
310 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
312 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
313 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
315 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
316 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
318 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
319 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
321 #undef TARGET_LIBGCC_CMP_RETURN_MODE
322 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
324 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
325 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
327 #undef TARGET_SCHED_SMS_RES_MII
328 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
330 #undef TARGET_ASM_FILE_START
331 #define TARGET_ASM_FILE_START asm_file_start
333 #undef TARGET_SECTION_TYPE_FLAGS
334 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
336 struct gcc_target targetm = TARGET_INITIALIZER;
339 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
341 /* Override some of the default param values. With so many registers
342 larger values are better for these params. */
343 MAX_PENDING_LIST_LENGTH = 128;
345 /* With so many registers this is better on by default. */
346 flag_rename_registers = 1;
349 /* Sometimes certain combinations of command options do not make sense
350 on a particular target machine. You can define a macro
351 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
352 executed once just after all the command options have been parsed. */
354 spu_override_options (void)
356 /* Small loops will be unpeeled at -O3. For SPU it is more important
357 to keep code small by default. */
358 if (!flag_unroll_loops && !flag_peel_loops
359 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
360 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
362 flag_omit_frame_pointer = 1;
364 /* Functions must be 8 byte aligned so we correctly handle dual issue */
365 if (align_functions < 8)
368 spu_hint_dist = 8*4 - spu_max_nops*4;
369 if (spu_hint_dist < 0)
372 if (spu_fixed_range_string)
373 fix_range (spu_fixed_range_string);
375 /* Determine processor architectural level. */
378 if (strcmp (&spu_arch_string[0], "cell") == 0)
379 spu_arch = PROCESSOR_CELL;
380 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
381 spu_arch = PROCESSOR_CELLEDP;
383 error ("Unknown architecture '%s'", &spu_arch_string[0]);
386 /* Determine processor to tune for. */
389 if (strcmp (&spu_tune_string[0], "cell") == 0)
390 spu_tune = PROCESSOR_CELL;
391 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
392 spu_tune = PROCESSOR_CELLEDP;
394 error ("Unknown architecture '%s'", &spu_tune_string[0]);
397 /* Change defaults according to the processor architecture. */
398 if (spu_arch == PROCESSOR_CELLEDP)
400 /* If no command line option has been otherwise specified, change
401 the default to -mno-safe-hints on celledp -- only the original
402 Cell/B.E. processors require this workaround. */
403 if (!(target_flags_explicit & MASK_SAFE_HINTS))
404 target_flags &= ~MASK_SAFE_HINTS;
407 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
410 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
411 struct attribute_spec.handler. */
413 /* Table of machine attributes. */
414 const struct attribute_spec spu_attribute_table[] =
416 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
417 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
418 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
419 { NULL, 0, 0, false, false, false, NULL }
422 /* True if MODE is valid for the target. By "valid", we mean able to
423 be manipulated in non-trivial ways. In particular, this means all
424 the arithmetic is supported. */
426 spu_scalar_mode_supported_p (enum machine_mode mode)
444 /* Similarly for vector modes. "Supported" here is less strict. At
445 least some operations are supported; need to check optabs or builtins
446 for further details. */
448 spu_vector_mode_supported_p (enum machine_mode mode)
465 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
466 least significant bytes of the outer mode. This function returns
467 TRUE for the SUBREG's where this is correct. */
469 valid_subreg (rtx op)
471 enum machine_mode om = GET_MODE (op);
472 enum machine_mode im = GET_MODE (SUBREG_REG (op));
473 return om != VOIDmode && im != VOIDmode
474 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
475 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
476 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
479 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
480 and adjust the start offset. */
482 adjust_operand (rtx op, HOST_WIDE_INT * start)
484 enum machine_mode mode;
486 /* Strip any paradoxical SUBREG. */
487 if (GET_CODE (op) == SUBREG
488 && (GET_MODE_BITSIZE (GET_MODE (op))
489 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
493 GET_MODE_BITSIZE (GET_MODE (op)) -
494 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
495 op = SUBREG_REG (op);
497 /* If it is smaller than SI, assure a SUBREG */
498 op_size = GET_MODE_BITSIZE (GET_MODE (op));
502 *start += 32 - op_size;
505 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
506 mode = mode_for_size (op_size, MODE_INT, 0);
507 if (mode != GET_MODE (op))
508 op = gen_rtx_SUBREG (mode, op, 0);
513 spu_expand_extv (rtx ops[], int unsignedp)
515 HOST_WIDE_INT width = INTVAL (ops[2]);
516 HOST_WIDE_INT start = INTVAL (ops[3]);
517 HOST_WIDE_INT src_size, dst_size;
518 enum machine_mode src_mode, dst_mode;
519 rtx dst = ops[0], src = ops[1];
522 dst = adjust_operand (ops[0], 0);
523 dst_mode = GET_MODE (dst);
524 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
526 src = adjust_operand (src, &start);
527 src_mode = GET_MODE (src);
528 src_size = GET_MODE_BITSIZE (GET_MODE (src));
532 s = gen_reg_rtx (src_mode);
536 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
539 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
542 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
550 if (width < src_size)
557 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
560 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
563 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
568 s = gen_reg_rtx (src_mode);
569 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
574 convert_move (dst, src, unsignedp);
578 spu_expand_insv (rtx ops[])
580 HOST_WIDE_INT width = INTVAL (ops[1]);
581 HOST_WIDE_INT start = INTVAL (ops[2]);
582 HOST_WIDE_INT maskbits;
583 enum machine_mode dst_mode, src_mode;
584 rtx dst = ops[0], src = ops[3];
585 int dst_size, src_size;
591 if (GET_CODE (ops[0]) == MEM)
592 dst = gen_reg_rtx (TImode);
594 dst = adjust_operand (dst, &start);
595 dst_mode = GET_MODE (dst);
596 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
598 if (CONSTANT_P (src))
600 enum machine_mode m =
601 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
602 src = force_reg (m, convert_to_mode (m, src, 0));
604 src = adjust_operand (src, 0);
605 src_mode = GET_MODE (src);
606 src_size = GET_MODE_BITSIZE (GET_MODE (src));
608 mask = gen_reg_rtx (dst_mode);
609 shift_reg = gen_reg_rtx (dst_mode);
610 shift = dst_size - start - width;
612 /* It's not safe to use subreg here because the compiler assumes
613 that the SUBREG_REG is right justified in the SUBREG. */
614 convert_move (shift_reg, src, 1);
621 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
624 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
627 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
639 maskbits = (-1ll << (32 - width - start));
641 maskbits += (1ll << (32 - start));
642 emit_move_insn (mask, GEN_INT (maskbits));
645 maskbits = (-1ll << (64 - width - start));
647 maskbits += (1ll << (64 - start));
648 emit_move_insn (mask, GEN_INT (maskbits));
652 unsigned char arr[16];
654 memset (arr, 0, sizeof (arr));
655 arr[i] = 0xff >> (start & 7);
656 for (i++; i <= (start + width - 1) / 8; i++)
658 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
659 emit_move_insn (mask, array_to_constant (TImode, arr));
665 if (GET_CODE (ops[0]) == MEM)
667 rtx aligned = gen_reg_rtx (SImode);
668 rtx low = gen_reg_rtx (SImode);
669 rtx addr = gen_reg_rtx (SImode);
670 rtx rotl = gen_reg_rtx (SImode);
671 rtx mask0 = gen_reg_rtx (TImode);
674 emit_move_insn (addr, XEXP (ops[0], 0));
675 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
676 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
677 emit_insn (gen_negsi2 (rotl, low));
678 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
679 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
680 mem = change_address (ops[0], TImode, aligned);
681 set_mem_alias_set (mem, 0);
682 emit_move_insn (dst, mem);
683 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
684 emit_move_insn (mem, dst);
685 if (start + width > MEM_ALIGN (ops[0]))
687 rtx shl = gen_reg_rtx (SImode);
688 rtx mask1 = gen_reg_rtx (TImode);
689 rtx dst1 = gen_reg_rtx (TImode);
691 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
692 emit_insn (gen_shlqby_ti (mask1, mask, shl));
693 mem1 = adjust_address (mem, TImode, 16);
694 set_mem_alias_set (mem1, 0);
695 emit_move_insn (dst1, mem1);
696 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
697 emit_move_insn (mem1, dst1);
701 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
706 spu_expand_block_move (rtx ops[])
708 HOST_WIDE_INT bytes, align, offset;
709 rtx src, dst, sreg, dreg, target;
711 if (GET_CODE (ops[2]) != CONST_INT
712 || GET_CODE (ops[3]) != CONST_INT
713 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
716 bytes = INTVAL (ops[2]);
717 align = INTVAL (ops[3]);
727 for (offset = 0; offset + 16 <= bytes; offset += 16)
729 dst = adjust_address (ops[0], V16QImode, offset);
730 src = adjust_address (ops[1], V16QImode, offset);
731 emit_move_insn (dst, src);
736 unsigned char arr[16] = { 0 };
737 for (i = 0; i < bytes - offset; i++)
739 dst = adjust_address (ops[0], V16QImode, offset);
740 src = adjust_address (ops[1], V16QImode, offset);
741 mask = gen_reg_rtx (V16QImode);
742 sreg = gen_reg_rtx (V16QImode);
743 dreg = gen_reg_rtx (V16QImode);
744 target = gen_reg_rtx (V16QImode);
745 emit_move_insn (mask, array_to_constant (V16QImode, arr));
746 emit_move_insn (dreg, dst);
747 emit_move_insn (sreg, src);
748 emit_insn (gen_selb (target, dreg, sreg, mask));
749 emit_move_insn (dst, target);
757 { SPU_EQ, SPU_GT, SPU_GTU };
759 int spu_comp_icode[12][3] = {
760 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
761 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
762 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
763 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
764 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
765 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
766 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
767 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
768 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
769 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
770 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
771 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
774 /* Generate a compare for CODE. Return a brand-new rtx that represents
775 the result of the compare. GCC can figure this out too if we don't
776 provide all variations of compares, but GCC always wants to use
777 WORD_MODE, we can generate better code in most cases if we do it
780 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
782 int reverse_compare = 0;
783 int reverse_test = 0;
784 rtx compare_result, eq_result;
785 rtx comp_rtx, eq_rtx;
786 rtx target = operands[0];
787 enum machine_mode comp_mode;
788 enum machine_mode op_mode;
789 enum spu_comp_code scode, eq_code, ior_code;
793 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
794 and so on, to keep the constant in operand 1. */
795 if (GET_CODE (spu_compare_op1) == CONST_INT)
797 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
798 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
802 spu_compare_op1 = GEN_INT (val);
806 spu_compare_op1 = GEN_INT (val);
810 spu_compare_op1 = GEN_INT (val);
814 spu_compare_op1 = GEN_INT (val);
823 op_mode = GET_MODE (spu_compare_op0);
829 if (HONOR_NANS (op_mode))
844 if (HONOR_NANS (op_mode))
936 comp_mode = V4SImode;
940 comp_mode = V2DImode;
947 if (GET_MODE (spu_compare_op1) == DFmode
948 && (scode != SPU_GT && scode != SPU_EQ))
951 if (is_set == 0 && spu_compare_op1 == const0_rtx
952 && (GET_MODE (spu_compare_op0) == SImode
953 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
955 /* Don't need to set a register with the result when we are
956 comparing against zero and branching. */
957 reverse_test = !reverse_test;
958 compare_result = spu_compare_op0;
962 compare_result = gen_reg_rtx (comp_mode);
966 rtx t = spu_compare_op1;
967 spu_compare_op1 = spu_compare_op0;
971 if (spu_comp_icode[index][scode] == 0)
974 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
975 (spu_compare_op0, op_mode))
976 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
977 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
978 (spu_compare_op1, op_mode))
979 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
980 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
985 emit_insn (comp_rtx);
989 eq_result = gen_reg_rtx (comp_mode);
990 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
996 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
997 gcc_assert (ior_code != CODE_FOR_nothing);
998 emit_insn (GEN_FCN (ior_code)
999 (compare_result, compare_result, eq_result));
1008 /* We don't have branch on QI compare insns, so we convert the
1009 QI compare result to a HI result. */
1010 if (comp_mode == QImode)
1012 rtx old_res = compare_result;
1013 compare_result = gen_reg_rtx (HImode);
1015 emit_insn (gen_extendqihi2 (compare_result, old_res));
1019 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1021 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1023 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1024 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1025 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1028 else if (is_set == 2)
1030 int compare_size = GET_MODE_BITSIZE (comp_mode);
1031 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1032 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1034 rtx op_t = operands[2];
1035 rtx op_f = operands[3];
1037 /* The result of the comparison can be SI, HI or QI mode. Create a
1038 mask based on that result. */
1039 if (target_size > compare_size)
1041 select_mask = gen_reg_rtx (mode);
1042 emit_insn (gen_extend_compare (select_mask, compare_result));
1044 else if (target_size < compare_size)
1046 gen_rtx_SUBREG (mode, compare_result,
1047 (compare_size - target_size) / BITS_PER_UNIT);
1048 else if (comp_mode != mode)
1049 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1051 select_mask = compare_result;
1053 if (GET_MODE (target) != GET_MODE (op_t)
1054 || GET_MODE (target) != GET_MODE (op_f))
1058 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1060 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1065 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1066 gen_rtx_NOT (comp_mode, compare_result)));
1067 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1068 emit_insn (gen_extendhisi2 (target, compare_result));
1069 else if (GET_MODE (target) == SImode
1070 && GET_MODE (compare_result) == QImode)
1071 emit_insn (gen_extend_compare (target, compare_result));
1073 emit_move_insn (target, compare_result);
1078 const_double_to_hwint (rtx x)
1082 if (GET_MODE (x) == SFmode)
1084 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1085 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1087 else if (GET_MODE (x) == DFmode)
1090 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1091 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1093 val = (val << 32) | (l[1] & 0xffffffff);
1101 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1105 gcc_assert (mode == SFmode || mode == DFmode);
1108 tv[0] = (v << 32) >> 32;
1109 else if (mode == DFmode)
1111 tv[1] = (v << 32) >> 32;
1114 real_from_target (&rv, tv, mode);
1115 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1119 print_operand_address (FILE * file, register rtx addr)
1124 if (GET_CODE (addr) == AND
1125 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1126 && INTVAL (XEXP (addr, 1)) == -16)
1127 addr = XEXP (addr, 0);
1129 switch (GET_CODE (addr))
1132 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1136 reg = XEXP (addr, 0);
1137 offset = XEXP (addr, 1);
1138 if (GET_CODE (offset) == REG)
1140 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1141 reg_names[REGNO (offset)]);
1143 else if (GET_CODE (offset) == CONST_INT)
1145 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1146 INTVAL (offset), reg_names[REGNO (reg)]);
1156 output_addr_const (file, addr);
1166 print_operand (FILE * file, rtx x, int code)
1168 enum machine_mode mode = GET_MODE (x);
1170 unsigned char arr[16];
1171 int xcode = GET_CODE (x);
1173 if (GET_MODE (x) == VOIDmode)
1176 case 'L': /* 128 bits, signed */
1177 case 'm': /* 128 bits, signed */
1178 case 'T': /* 128 bits, signed */
1179 case 't': /* 128 bits, signed */
1182 case 'K': /* 64 bits, signed */
1183 case 'k': /* 64 bits, signed */
1184 case 'D': /* 64 bits, signed */
1185 case 'd': /* 64 bits, signed */
1188 case 'J': /* 32 bits, signed */
1189 case 'j': /* 32 bits, signed */
1190 case 's': /* 32 bits, signed */
1191 case 'S': /* 32 bits, signed */
1198 case 'j': /* 32 bits, signed */
1199 case 'k': /* 64 bits, signed */
1200 case 'm': /* 128 bits, signed */
1201 if (xcode == CONST_INT
1202 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1204 gcc_assert (logical_immediate_p (x, mode));
1205 constant_to_array (mode, x, arr);
1206 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1207 val = trunc_int_for_mode (val, SImode);
1208 switch (which_logical_immediate (val))
1213 fprintf (file, "h");
1216 fprintf (file, "b");
1226 case 'J': /* 32 bits, signed */
1227 case 'K': /* 64 bits, signed */
1228 case 'L': /* 128 bits, signed */
1229 if (xcode == CONST_INT
1230 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1232 gcc_assert (logical_immediate_p (x, mode)
1233 || iohl_immediate_p (x, mode));
1234 constant_to_array (mode, x, arr);
1235 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1236 val = trunc_int_for_mode (val, SImode);
1237 switch (which_logical_immediate (val))
1243 val = trunc_int_for_mode (val, HImode);
1246 val = trunc_int_for_mode (val, QImode);
1251 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1257 case 't': /* 128 bits, signed */
1258 case 'd': /* 64 bits, signed */
1259 case 's': /* 32 bits, signed */
1262 enum immediate_class c = classify_immediate (x, mode);
1266 constant_to_array (mode, x, arr);
1267 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1268 val = trunc_int_for_mode (val, SImode);
1269 switch (which_immediate_load (val))
1274 fprintf (file, "a");
1277 fprintf (file, "h");
1280 fprintf (file, "hu");
1287 constant_to_array (mode, x, arr);
1288 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1290 fprintf (file, "b");
1292 fprintf (file, "h");
1294 fprintf (file, "w");
1296 fprintf (file, "d");
1299 if (xcode == CONST_VECTOR)
1301 x = CONST_VECTOR_ELT (x, 0);
1302 xcode = GET_CODE (x);
1304 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1305 fprintf (file, "a");
1306 else if (xcode == HIGH)
1307 fprintf (file, "hu");
1321 case 'T': /* 128 bits, signed */
1322 case 'D': /* 64 bits, signed */
1323 case 'S': /* 32 bits, signed */
1326 enum immediate_class c = classify_immediate (x, mode);
1330 constant_to_array (mode, x, arr);
1331 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1332 val = trunc_int_for_mode (val, SImode);
1333 switch (which_immediate_load (val))
1340 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1348 constant_to_array (mode, x, arr);
1350 for (i = 0; i < 16; i++)
1355 print_operand (file, GEN_INT (val), 0);
1358 constant_to_array (mode, x, arr);
1359 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1365 if (GET_CODE (x) == CONST_VECTOR)
1366 x = CONST_VECTOR_ELT (x, 0);
1367 output_addr_const (file, x);
1369 fprintf (file, "@h");
1383 if (xcode == CONST_INT)
1385 /* Only 4 least significant bits are relevant for generate
1386 control word instructions. */
1387 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1392 case 'M': /* print code for c*d */
1393 if (GET_CODE (x) == CONST_INT)
1397 fprintf (file, "b");
1400 fprintf (file, "h");
1403 fprintf (file, "w");
1406 fprintf (file, "d");
1415 case 'N': /* Negate the operand */
1416 if (xcode == CONST_INT)
1417 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1418 else if (xcode == CONST_VECTOR)
1419 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1420 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1423 case 'I': /* enable/disable interrupts */
1424 if (xcode == CONST_INT)
1425 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1428 case 'b': /* branch modifiers */
1430 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1431 else if (COMPARISON_P (x))
1432 fprintf (file, "%s", xcode == NE ? "n" : "");
1435 case 'i': /* indirect call */
1438 if (GET_CODE (XEXP (x, 0)) == REG)
1439 /* Used in indirect function calls. */
1440 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1442 output_address (XEXP (x, 0));
1446 case 'p': /* load/store */
1450 xcode = GET_CODE (x);
1455 xcode = GET_CODE (x);
1458 fprintf (file, "d");
1459 else if (xcode == CONST_INT)
1460 fprintf (file, "a");
1461 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1462 fprintf (file, "r");
1463 else if (xcode == PLUS || xcode == LO_SUM)
1465 if (GET_CODE (XEXP (x, 1)) == REG)
1466 fprintf (file, "x");
1468 fprintf (file, "d");
1473 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1475 output_addr_const (file, GEN_INT (val));
1479 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1481 output_addr_const (file, GEN_INT (val));
1485 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1487 output_addr_const (file, GEN_INT (val));
1491 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1492 val = (val >> 3) & 0x1f;
1493 output_addr_const (file, GEN_INT (val));
1497 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1500 output_addr_const (file, GEN_INT (val));
1504 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1507 output_addr_const (file, GEN_INT (val));
1511 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1514 output_addr_const (file, GEN_INT (val));
1518 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1519 val = -(val & -8ll);
1520 val = (val >> 3) & 0x1f;
1521 output_addr_const (file, GEN_INT (val));
1526 fprintf (file, "%s", reg_names[REGNO (x)]);
1527 else if (xcode == MEM)
1528 output_address (XEXP (x, 0));
1529 else if (xcode == CONST_VECTOR)
1530 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1532 output_addr_const (file, x);
1539 output_operand_lossage ("invalid %%xn code");
1544 extern char call_used_regs[];
1546 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1547 caller saved register. For leaf functions it is more efficient to
1548 use a volatile register because we won't need to save and restore the
1549 pic register. This routine is only valid after register allocation
1550 is completed, so we can pick an unused register. */
1554 rtx pic_reg = pic_offset_table_rtx;
1555 if (!reload_completed && !reload_in_progress)
1560 /* Split constant addresses to handle cases that are too large.
1561 Add in the pic register when in PIC mode.
1562 Split immediates that require more than 1 instruction. */
1564 spu_split_immediate (rtx * ops)
1566 enum machine_mode mode = GET_MODE (ops[0]);
1567 enum immediate_class c = classify_immediate (ops[1], mode);
1573 unsigned char arrhi[16];
1574 unsigned char arrlo[16];
1575 rtx to, temp, hi, lo;
1577 enum machine_mode imode = mode;
1578 /* We need to do reals as ints because the constant used in the
1579 IOR might not be a legitimate real constant. */
1580 imode = int_mode_for_mode (mode);
1581 constant_to_array (mode, ops[1], arrhi);
1583 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1586 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1587 for (i = 0; i < 16; i += 4)
1589 arrlo[i + 2] = arrhi[i + 2];
1590 arrlo[i + 3] = arrhi[i + 3];
1591 arrlo[i + 0] = arrlo[i + 1] = 0;
1592 arrhi[i + 2] = arrhi[i + 3] = 0;
1594 hi = array_to_constant (imode, arrhi);
1595 lo = array_to_constant (imode, arrlo);
1596 emit_move_insn (temp, hi);
1597 emit_insn (gen_rtx_SET
1598 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1603 unsigned char arr_fsmbi[16];
1604 unsigned char arr_andbi[16];
1605 rtx to, reg_fsmbi, reg_and;
1607 enum machine_mode imode = mode;
1608 /* We need to do reals as ints because the constant used in the
1609 * AND might not be a legitimate real constant. */
1610 imode = int_mode_for_mode (mode);
1611 constant_to_array (mode, ops[1], arr_fsmbi);
1613 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1616 for (i = 0; i < 16; i++)
1617 if (arr_fsmbi[i] != 0)
1619 arr_andbi[0] = arr_fsmbi[i];
1620 arr_fsmbi[i] = 0xff;
1622 for (i = 1; i < 16; i++)
1623 arr_andbi[i] = arr_andbi[0];
1624 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1625 reg_and = array_to_constant (imode, arr_andbi);
1626 emit_move_insn (to, reg_fsmbi);
1627 emit_insn (gen_rtx_SET
1628 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1632 if (reload_in_progress || reload_completed)
1634 rtx mem = force_const_mem (mode, ops[1]);
1635 if (TARGET_LARGE_MEM)
1637 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1638 emit_move_insn (addr, XEXP (mem, 0));
1639 mem = replace_equiv_address (mem, addr);
1641 emit_move_insn (ops[0], mem);
1647 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1651 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1652 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1655 emit_insn (gen_pic (ops[0], ops[1]));
1658 rtx pic_reg = get_pic_reg ();
1659 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1660 crtl->uses_pic_offset_table = 1;
1662 return flag_pic || c == IC_IL2s;
1673 /* SAVING is TRUE when we are generating the actual load and store
1674 instructions for REGNO. When determining the size of the stack
1675 needed for saving register we must allocate enough space for the
1676 worst case, because we don't always have the information early enough
1677 to not allocate it. But we can at least eliminate the actual loads
1678 and stores during the prologue/epilogue. */
1680 need_to_save_reg (int regno, int saving)
1682 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1685 && regno == PIC_OFFSET_TABLE_REGNUM
1686 && (!saving || crtl->uses_pic_offset_table)
1688 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1693 /* This function is only correct starting with local register
1696 spu_saved_regs_size (void)
1698 int reg_save_size = 0;
1701 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1702 if (need_to_save_reg (regno, 0))
1703 reg_save_size += 0x10;
1704 return reg_save_size;
1708 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1710 rtx reg = gen_rtx_REG (V4SImode, regno);
1712 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1713 return emit_insn (gen_movv4si (mem, reg));
1717 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1719 rtx reg = gen_rtx_REG (V4SImode, regno);
1721 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1722 return emit_insn (gen_movv4si (reg, mem));
1725 /* This happens after reload, so we need to expand it. */
1727 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1730 if (satisfies_constraint_K (GEN_INT (imm)))
1732 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1736 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1737 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1738 if (REGNO (src) == REGNO (scratch))
1744 /* Return nonzero if this function is known to have a null epilogue. */
1747 direct_return (void)
1749 if (reload_completed)
1751 if (cfun->static_chain_decl == 0
1752 && (spu_saved_regs_size ()
1754 + crtl->outgoing_args_size
1755 + crtl->args.pretend_args_size == 0)
1756 && current_function_is_leaf)
1763 The stack frame looks like this:
1767 AP -> +-------------+
1770 prev SP | back chain |
1773 | reg save | crtl->args.pretend_args_size bytes
1776 | saved regs | spu_saved_regs_size() bytes
1777 FP -> +-------------+
1779 | vars | get_frame_size() bytes
1780 HFP -> +-------------+
1783 | args | crtl->outgoing_args_size bytes
1789 SP -> +-------------+
1793 spu_expand_prologue (void)
1795 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1796 HOST_WIDE_INT total_size;
1797 HOST_WIDE_INT saved_regs_size;
1798 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1799 rtx scratch_reg_0, scratch_reg_1;
1802 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1803 the "toplevel" insn chain. */
1804 emit_note (NOTE_INSN_DELETED);
1806 if (flag_pic && optimize == 0)
1807 crtl->uses_pic_offset_table = 1;
1809 if (spu_naked_function_p (current_function_decl))
1812 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1813 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1815 saved_regs_size = spu_saved_regs_size ();
1816 total_size = size + saved_regs_size
1817 + crtl->outgoing_args_size
1818 + crtl->args.pretend_args_size;
1820 if (!current_function_is_leaf
1821 || cfun->calls_alloca || total_size > 0)
1822 total_size += STACK_POINTER_OFFSET;
1824 /* Save this first because code after this might use the link
1825 register as a scratch register. */
1826 if (!current_function_is_leaf)
1828 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1829 RTX_FRAME_RELATED_P (insn) = 1;
1834 offset = -crtl->args.pretend_args_size;
1835 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1836 if (need_to_save_reg (regno, 1))
1839 insn = frame_emit_store (regno, sp_reg, offset);
1840 RTX_FRAME_RELATED_P (insn) = 1;
1844 if (flag_pic && crtl->uses_pic_offset_table)
1846 rtx pic_reg = get_pic_reg ();
1847 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1848 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1853 if (flag_stack_check)
1855 /* We compare against total_size-1 because
1856 ($sp >= total_size) <=> ($sp > total_size-1) */
1857 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1858 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1859 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1860 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1862 emit_move_insn (scratch_v4si, size_v4si);
1863 size_v4si = scratch_v4si;
1865 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1866 emit_insn (gen_vec_extractv4si
1867 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1868 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1871 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1872 the value of the previous $sp because we save it as the back
1874 if (total_size <= 2000)
1876 /* In this case we save the back chain first. */
1877 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1879 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1883 insn = emit_move_insn (scratch_reg_0, sp_reg);
1885 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1887 RTX_FRAME_RELATED_P (insn) = 1;
1888 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1890 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1892 if (total_size > 2000)
1894 /* Save the back chain ptr */
1895 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1898 if (frame_pointer_needed)
1900 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1901 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1902 + crtl->outgoing_args_size;
1903 /* Set the new frame_pointer */
1904 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1905 RTX_FRAME_RELATED_P (insn) = 1;
1906 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1908 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1909 real, REG_NOTES (insn));
1910 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1914 emit_note (NOTE_INSN_DELETED);
1918 spu_expand_epilogue (bool sibcall_p)
1920 int size = get_frame_size (), offset, regno;
1921 HOST_WIDE_INT saved_regs_size, total_size;
1922 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1923 rtx jump, scratch_reg_0;
1925 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1926 the "toplevel" insn chain. */
1927 emit_note (NOTE_INSN_DELETED);
1929 if (spu_naked_function_p (current_function_decl))
1932 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1934 saved_regs_size = spu_saved_regs_size ();
1935 total_size = size + saved_regs_size
1936 + crtl->outgoing_args_size
1937 + crtl->args.pretend_args_size;
1939 if (!current_function_is_leaf
1940 || cfun->calls_alloca || total_size > 0)
1941 total_size += STACK_POINTER_OFFSET;
1945 if (cfun->calls_alloca)
1946 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1948 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1951 if (saved_regs_size > 0)
1953 offset = -crtl->args.pretend_args_size;
1954 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1955 if (need_to_save_reg (regno, 1))
1958 frame_emit_load (regno, sp_reg, offset);
1963 if (!current_function_is_leaf)
1964 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1968 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1969 jump = emit_jump_insn (gen__return ());
1970 emit_barrier_after (jump);
1973 emit_note (NOTE_INSN_DELETED);
1977 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1981 /* This is inefficient because it ends up copying to a save-register
1982 which then gets saved even though $lr has already been saved. But
1983 it does generate better code for leaf functions and we don't need
1984 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1985 used for __builtin_return_address anyway, so maybe we don't care if
1986 it's inefficient. */
1987 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1991 /* Given VAL, generate a constant appropriate for MODE.
1992 If MODE is a vector mode, every element will be VAL.
1993 For TImode, VAL will be zero extended to 128 bits. */
1995 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2001 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2002 || GET_MODE_CLASS (mode) == MODE_FLOAT
2003 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2004 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2006 if (GET_MODE_CLASS (mode) == MODE_INT)
2007 return immed_double_const (val, 0, mode);
2009 /* val is the bit representation of the float */
2010 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2011 return hwint_to_const_double (mode, val);
2013 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2014 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2016 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2018 units = GET_MODE_NUNITS (mode);
2020 v = rtvec_alloc (units);
2022 for (i = 0; i < units; ++i)
2023 RTVEC_ELT (v, i) = inner;
2025 return gen_rtx_CONST_VECTOR (mode, v);
2028 /* Create a MODE vector constant from 4 ints. */
2030 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2032 unsigned char arr[16];
2033 arr[0] = (a >> 24) & 0xff;
2034 arr[1] = (a >> 16) & 0xff;
2035 arr[2] = (a >> 8) & 0xff;
2036 arr[3] = (a >> 0) & 0xff;
2037 arr[4] = (b >> 24) & 0xff;
2038 arr[5] = (b >> 16) & 0xff;
2039 arr[6] = (b >> 8) & 0xff;
2040 arr[7] = (b >> 0) & 0xff;
2041 arr[8] = (c >> 24) & 0xff;
2042 arr[9] = (c >> 16) & 0xff;
2043 arr[10] = (c >> 8) & 0xff;
2044 arr[11] = (c >> 0) & 0xff;
2045 arr[12] = (d >> 24) & 0xff;
2046 arr[13] = (d >> 16) & 0xff;
2047 arr[14] = (d >> 8) & 0xff;
2048 arr[15] = (d >> 0) & 0xff;
2049 return array_to_constant(mode, arr);
2052 /* branch hint stuff */
2054 /* An array of these is used to propagate hints to predecessor blocks. */
2057 rtx prop_jump; /* propagated from another block */
2058 int bb_index; /* the original block. */
2060 static struct spu_bb_info *spu_bb_info;
2062 #define STOP_HINT_P(INSN) \
2063 (GET_CODE(INSN) == CALL_INSN \
2064 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2065 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2067 /* 1 when RTX is a hinted branch or its target. We keep track of
2068 what has been hinted so the safe-hint code can test it easily. */
2069 #define HINTED_P(RTX) \
2070 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2072 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2073 #define SCHED_ON_EVEN_P(RTX) \
2074 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2076 /* Emit a nop for INSN such that the two will dual issue. This assumes
2077 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2078 We check for TImode to handle a MULTI1 insn which has dual issued its
2079 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2082 emit_nop_for_insn (rtx insn)
2086 p = get_pipe (insn);
2087 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2088 new_insn = emit_insn_after (gen_lnop (), insn);
2089 else if (p == 1 && GET_MODE (insn) == TImode)
2091 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2092 PUT_MODE (new_insn, TImode);
2093 PUT_MODE (insn, VOIDmode);
2096 new_insn = emit_insn_after (gen_lnop (), insn);
2097 recog_memoized (new_insn);
2100 /* Insert nops in basic blocks to meet dual issue alignment
2101 requirements. Also make sure hbrp and hint instructions are at least
2102 one cycle apart, possibly inserting a nop. */
2106 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2110 /* This sets up INSN_ADDRESSES. */
2111 shorten_branches (get_insns ());
2113 /* Keep track of length added by nops. */
2117 insn = get_insns ();
2118 if (!active_insn_p (insn))
2119 insn = next_active_insn (insn);
2120 for (; insn; insn = next_insn)
2122 next_insn = next_active_insn (insn);
2123 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2124 || INSN_CODE (insn) == CODE_FOR_hbr)
2128 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2129 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2130 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2133 prev_insn = emit_insn_before (gen_lnop (), insn);
2134 PUT_MODE (prev_insn, GET_MODE (insn));
2135 PUT_MODE (insn, TImode);
2141 if (INSN_CODE (insn) == CODE_FOR_blockage)
2143 if (GET_MODE (insn) == TImode)
2144 PUT_MODE (next_insn, TImode);
2146 next_insn = next_active_insn (insn);
2148 addr = INSN_ADDRESSES (INSN_UID (insn));
2149 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2151 if (((addr + length) & 7) != 0)
2153 emit_nop_for_insn (prev_insn);
2157 else if (GET_MODE (insn) == TImode
2158 && ((next_insn && GET_MODE (next_insn) != TImode)
2159 || get_attr_type (insn) == TYPE_MULTI0)
2160 && ((addr + length) & 7) != 0)
2162 /* prev_insn will always be set because the first insn is
2163 always 8-byte aligned. */
2164 emit_nop_for_insn (prev_insn);
2172 /* Routines for branch hints. */
2175 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2176 int distance, sbitmap blocks)
2178 rtx branch_label = 0;
2183 if (before == 0 || branch == 0 || target == 0)
2186 /* While scheduling we require hints to be no further than 600, so
2187 we need to enforce that here too */
2191 /* If we have a Basic block note, emit it after the basic block note. */
2192 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2193 before = NEXT_INSN (before);
2195 branch_label = gen_label_rtx ();
2196 LABEL_NUSES (branch_label)++;
2197 LABEL_PRESERVE_P (branch_label) = 1;
2198 insn = emit_label_before (branch_label, branch);
2199 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2200 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2202 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2203 recog_memoized (hint);
2204 HINTED_P (branch) = 1;
2206 if (GET_CODE (target) == LABEL_REF)
2207 HINTED_P (XEXP (target, 0)) = 1;
2208 else if (tablejump_p (branch, 0, &table))
2212 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2213 vec = XVEC (PATTERN (table), 0);
2215 vec = XVEC (PATTERN (table), 1);
2216 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2217 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2220 if (distance >= 588)
2222 /* Make sure the hint isn't scheduled any earlier than this point,
2223 which could make it too far for the branch offest to fit */
2224 recog_memoized (emit_insn_before (gen_blockage (), hint));
2226 else if (distance <= 8 * 4)
2228 /* To guarantee at least 8 insns between the hint and branch we
2231 for (d = distance; d < 8 * 4; d += 4)
2234 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2235 recog_memoized (insn);
2238 /* Make sure any nops inserted aren't scheduled before the hint. */
2239 recog_memoized (emit_insn_after (gen_blockage (), hint));
2241 /* Make sure any nops inserted aren't scheduled after the call. */
2242 if (CALL_P (branch) && distance < 8 * 4)
2243 recog_memoized (emit_insn_before (gen_blockage (), branch));
2247 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2248 the rtx for the branch target. */
2250 get_branch_target (rtx branch)
2252 if (GET_CODE (branch) == JUMP_INSN)
2256 /* Return statements */
2257 if (GET_CODE (PATTERN (branch)) == RETURN)
2258 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2261 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2262 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2265 set = single_set (branch);
2266 src = SET_SRC (set);
2267 if (GET_CODE (SET_DEST (set)) != PC)
2270 if (GET_CODE (src) == IF_THEN_ELSE)
2273 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2276 /* If the more probable case is not a fall through, then
2277 try a branch hint. */
2278 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2279 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2280 && GET_CODE (XEXP (src, 1)) != PC)
2281 lab = XEXP (src, 1);
2282 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2283 && GET_CODE (XEXP (src, 2)) != PC)
2284 lab = XEXP (src, 2);
2288 if (GET_CODE (lab) == RETURN)
2289 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2297 else if (GET_CODE (branch) == CALL_INSN)
2300 /* All of our call patterns are in a PARALLEL and the CALL is
2301 the first pattern in the PARALLEL. */
2302 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2304 call = XVECEXP (PATTERN (branch), 0, 0);
2305 if (GET_CODE (call) == SET)
2306 call = SET_SRC (call);
2307 if (GET_CODE (call) != CALL)
2309 return XEXP (XEXP (call, 0), 0);
2314 /* The special $hbr register is used to prevent the insn scheduler from
2315 moving hbr insns across instructions which invalidate them. It
2316 should only be used in a clobber, and this function searches for
2317 insns which clobber it. */
2319 insn_clobbers_hbr (rtx insn)
2322 && GET_CODE (PATTERN (insn)) == PARALLEL)
2324 rtx parallel = PATTERN (insn);
2327 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2329 clobber = XVECEXP (parallel, 0, j);
2330 if (GET_CODE (clobber) == CLOBBER
2331 && GET_CODE (XEXP (clobber, 0)) == REG
2332 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2339 /* Search up to 32 insns starting at FIRST:
2340 - at any kind of hinted branch, just return
2341 - at any unconditional branch in the first 15 insns, just return
2342 - at a call or indirect branch, after the first 15 insns, force it to
2343 an even address and return
2344 - at any unconditional branch, after the first 15 insns, force it to
2346 At then end of the search, insert an hbrp within 4 insns of FIRST,
2347 and an hbrp within 16 instructions of FIRST.
2350 insert_hbrp_for_ilb_runout (rtx first)
2352 rtx insn, before_4 = 0, before_16 = 0;
2353 int addr = 0, length, first_addr = -1;
2354 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2355 int insert_lnop_after = 0;
2356 for (insn = first; insn; insn = NEXT_INSN (insn))
2359 if (first_addr == -1)
2360 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2361 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2362 length = get_attr_length (insn);
2364 if (before_4 == 0 && addr + length >= 4 * 4)
2366 /* We test for 14 instructions because the first hbrp will add
2367 up to 2 instructions. */
2368 if (before_16 == 0 && addr + length >= 14 * 4)
2371 if (INSN_CODE (insn) == CODE_FOR_hbr)
2373 /* Make sure an hbrp is at least 2 cycles away from a hint.
2374 Insert an lnop after the hbrp when necessary. */
2375 if (before_4 == 0 && addr > 0)
2378 insert_lnop_after |= 1;
2380 else if (before_4 && addr <= 4 * 4)
2381 insert_lnop_after |= 1;
2382 if (before_16 == 0 && addr > 10 * 4)
2385 insert_lnop_after |= 2;
2387 else if (before_16 && addr <= 14 * 4)
2388 insert_lnop_after |= 2;
2391 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2393 if (addr < hbrp_addr0)
2395 else if (addr < hbrp_addr1)
2399 if (CALL_P (insn) || JUMP_P (insn))
2401 if (HINTED_P (insn))
2404 /* Any branch after the first 15 insns should be on an even
2405 address to avoid a special case branch. There might be
2406 some nops and/or hbrps inserted, so we test after 10
2409 SCHED_ON_EVEN_P (insn) = 1;
2412 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2416 if (addr + length >= 32 * 4)
2418 gcc_assert (before_4 && before_16);
2419 if (hbrp_addr0 > 4 * 4)
2422 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2423 recog_memoized (insn);
2424 INSN_ADDRESSES_NEW (insn,
2425 INSN_ADDRESSES (INSN_UID (before_4)));
2426 PUT_MODE (insn, GET_MODE (before_4));
2427 PUT_MODE (before_4, TImode);
2428 if (insert_lnop_after & 1)
2430 insn = emit_insn_before (gen_lnop (), before_4);
2431 recog_memoized (insn);
2432 INSN_ADDRESSES_NEW (insn,
2433 INSN_ADDRESSES (INSN_UID (before_4)));
2434 PUT_MODE (insn, TImode);
2437 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2438 && hbrp_addr1 > 16 * 4)
2441 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2442 recog_memoized (insn);
2443 INSN_ADDRESSES_NEW (insn,
2444 INSN_ADDRESSES (INSN_UID (before_16)));
2445 PUT_MODE (insn, GET_MODE (before_16));
2446 PUT_MODE (before_16, TImode);
2447 if (insert_lnop_after & 2)
2449 insn = emit_insn_before (gen_lnop (), before_16);
2450 recog_memoized (insn);
2451 INSN_ADDRESSES_NEW (insn,
2452 INSN_ADDRESSES (INSN_UID
2454 PUT_MODE (insn, TImode);
2460 else if (BARRIER_P (insn))
2465 /* The SPU might hang when it executes 48 inline instructions after a
2466 hinted branch jumps to its hinted target. The beginning of a
2467 function and the return from a call might have been hinted, and must
2468 be handled as well. To prevent a hang we insert 2 hbrps. The first
2469 should be within 6 insns of the branch target. The second should be
2470 within 22 insns of the branch target. When determining if hbrps are
2471 necessary, we look for only 32 inline instructions, because up to to
2472 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2473 new hbrps, we insert them within 4 and 16 insns of the target. */
2478 if (TARGET_SAFE_HINTS)
2480 shorten_branches (get_insns ());
2481 /* Insert hbrp at beginning of function */
2482 insn = next_active_insn (get_insns ());
2484 insert_hbrp_for_ilb_runout (insn);
2485 /* Insert hbrp after hinted targets. */
2486 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2487 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2488 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2492 static int in_spu_reorg;
2494 /* Insert branch hints. There are no branch optimizations after this
2495 pass, so it's safe to set our branch hints now. */
2497 spu_machine_dependent_reorg (void)
2502 rtx branch_target = 0;
2503 int branch_addr = 0, insn_addr, required_dist = 0;
2507 if (!TARGET_BRANCH_HINTS || optimize == 0)
2509 /* We still do it for unoptimized code because an external
2510 function might have hinted a call or return. */
2516 blocks = sbitmap_alloc (last_basic_block);
2517 sbitmap_zero (blocks);
2520 compute_bb_for_insn ();
2525 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2526 sizeof (struct spu_bb_info));
2528 /* We need exact insn addresses and lengths. */
2529 shorten_branches (get_insns ());
2531 for (i = n_basic_blocks - 1; i >= 0; i--)
2533 bb = BASIC_BLOCK (i);
2535 if (spu_bb_info[i].prop_jump)
2537 branch = spu_bb_info[i].prop_jump;
2538 branch_target = get_branch_target (branch);
2539 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2540 required_dist = spu_hint_dist;
2542 /* Search from end of a block to beginning. In this loop, find
2543 jumps which need a branch and emit them only when:
2544 - it's an indirect branch and we're at the insn which sets
2546 - we're at an insn that will invalidate the hint. e.g., a
2547 call, another hint insn, inline asm that clobbers $hbr, and
2548 some inlined operations (divmodsi4). Don't consider jumps
2549 because they are only at the end of a block and are
2550 considered when we are deciding whether to propagate
2551 - we're getting too far away from the branch. The hbr insns
2552 only have a signed 10 bit offset
2553 We go back as far as possible so the branch will be considered
2554 for propagation when we get to the beginning of the block. */
2555 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2559 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2561 && ((GET_CODE (branch_target) == REG
2562 && set_of (branch_target, insn) != NULL_RTX)
2563 || insn_clobbers_hbr (insn)
2564 || branch_addr - insn_addr > 600))
2566 rtx next = NEXT_INSN (insn);
2567 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2568 if (insn != BB_END (bb)
2569 && branch_addr - next_addr >= required_dist)
2573 "hint for %i in block %i before %i\n",
2574 INSN_UID (branch), bb->index,
2576 spu_emit_branch_hint (next, branch, branch_target,
2577 branch_addr - next_addr, blocks);
2582 /* JUMP_P will only be true at the end of a block. When
2583 branch is already set it means we've previously decided
2584 to propagate a hint for that branch into this block. */
2585 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2588 if ((branch_target = get_branch_target (insn)))
2591 branch_addr = insn_addr;
2592 required_dist = spu_hint_dist;
2596 if (insn == BB_HEAD (bb))
2602 /* If we haven't emitted a hint for this branch yet, it might
2603 be profitable to emit it in one of the predecessor blocks,
2604 especially for loops. */
2606 basic_block prev = 0, prop = 0, prev2 = 0;
2607 int loop_exit = 0, simple_loop = 0;
2608 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2610 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2611 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2612 prev = EDGE_PRED (bb, j)->src;
2614 prev2 = EDGE_PRED (bb, j)->src;
2616 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2617 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2619 else if (EDGE_SUCC (bb, j)->dest == bb)
2622 /* If this branch is a loop exit then propagate to previous
2623 fallthru block. This catches the cases when it is a simple
2624 loop or when there is an initial branch into the loop. */
2625 if (prev && (loop_exit || simple_loop)
2626 && prev->loop_depth <= bb->loop_depth)
2629 /* If there is only one adjacent predecessor. Don't propagate
2630 outside this loop. This loop_depth test isn't perfect, but
2631 I'm not sure the loop_father member is valid at this point. */
2632 else if (prev && single_pred_p (bb)
2633 && prev->loop_depth == bb->loop_depth)
2636 /* If this is the JOIN block of a simple IF-THEN then
2637 propogate the hint to the HEADER block. */
2638 else if (prev && prev2
2639 && EDGE_COUNT (bb->preds) == 2
2640 && EDGE_COUNT (prev->preds) == 1
2641 && EDGE_PRED (prev, 0)->src == prev2
2642 && prev2->loop_depth == bb->loop_depth
2643 && GET_CODE (branch_target) != REG)
2646 /* Don't propagate when:
2647 - this is a simple loop and the hint would be too far
2648 - this is not a simple loop and there are 16 insns in
2650 - the predecessor block ends in a branch that will be
2652 - the predecessor block ends in an insn that invalidates
2656 && (bbend = BB_END (prop))
2657 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2658 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2659 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2662 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2663 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2664 bb->index, prop->index, bb->loop_depth,
2665 INSN_UID (branch), loop_exit, simple_loop,
2666 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2668 spu_bb_info[prop->index].prop_jump = branch;
2669 spu_bb_info[prop->index].bb_index = i;
2671 else if (branch_addr - next_addr >= required_dist)
2674 fprintf (dump_file, "hint for %i in block %i before %i\n",
2675 INSN_UID (branch), bb->index,
2676 INSN_UID (NEXT_INSN (insn)));
2677 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2678 branch_addr - next_addr, blocks);
2685 if (!sbitmap_empty_p (blocks))
2686 find_many_sub_basic_blocks (blocks);
2688 /* We have to schedule to make sure alignment is ok. */
2689 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2691 /* The hints need to be scheduled, so call it again. */
2699 if (spu_flag_var_tracking)
2702 timevar_push (TV_VAR_TRACKING);
2703 variable_tracking_main ();
2704 timevar_pop (TV_VAR_TRACKING);
2705 df_finish_pass (false);
2708 free_bb_for_insn ();
2714 /* Insn scheduling routines, primarily for dual issue. */
2716 spu_sched_issue_rate (void)
2722 uses_ls_unit(rtx insn)
2724 rtx set = single_set (insn);
2726 && (GET_CODE (SET_DEST (set)) == MEM
2727 || GET_CODE (SET_SRC (set)) == MEM))
2736 /* Handle inline asm */
2737 if (INSN_CODE (insn) == -1)
2739 t = get_attr_type (insn);
2764 case TYPE_IPREFETCH:
2772 /* haifa-sched.c has a static variable that keeps track of the current
2773 cycle. It is passed to spu_sched_reorder, and we record it here for
2774 use by spu_sched_variable_issue. It won't be accurate if the
2775 scheduler updates it's clock_var between the two calls. */
2776 static int clock_var;
2778 /* This is used to keep track of insn alignment. Set to 0 at the
2779 beginning of each block and increased by the "length" attr of each
2781 static int spu_sched_length;
2783 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2784 ready list appropriately in spu_sched_reorder(). */
2785 static int pipe0_clock;
2786 static int pipe1_clock;
2788 static int prev_clock_var;
2790 static int prev_priority;
2792 /* The SPU needs to load the next ilb sometime during the execution of
2793 the previous ilb. There is a potential conflict if every cycle has a
2794 load or store. To avoid the conflict we make sure the load/store
2795 unit is free for at least one cycle during the execution of insns in
2796 the previous ilb. */
2797 static int spu_ls_first;
2798 static int prev_ls_clock;
2801 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2802 int max_ready ATTRIBUTE_UNUSED)
2804 spu_sched_length = 0;
2808 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2809 int max_ready ATTRIBUTE_UNUSED)
2811 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2813 /* When any block might be at least 8-byte aligned, assume they
2814 will all be at least 8-byte aligned to make sure dual issue
2815 works out correctly. */
2816 spu_sched_length = 0;
2818 spu_ls_first = INT_MAX;
2823 prev_clock_var = -1;
2828 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2829 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2833 if (GET_CODE (PATTERN (insn)) == USE
2834 || GET_CODE (PATTERN (insn)) == CLOBBER
2835 || (len = get_attr_length (insn)) == 0)
2838 spu_sched_length += len;
2840 /* Reset on inline asm */
2841 if (INSN_CODE (insn) == -1)
2843 spu_ls_first = INT_MAX;
2848 p = get_pipe (insn);
2850 pipe0_clock = clock_var;
2852 pipe1_clock = clock_var;
2856 if (clock_var - prev_ls_clock > 1
2857 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2858 spu_ls_first = INT_MAX;
2859 if (uses_ls_unit (insn))
2861 if (spu_ls_first == INT_MAX)
2862 spu_ls_first = spu_sched_length;
2863 prev_ls_clock = clock_var;
2866 /* The scheduler hasn't inserted the nop, but we will later on.
2867 Include those nops in spu_sched_length. */
2868 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2869 spu_sched_length += 4;
2870 prev_clock_var = clock_var;
2872 /* more is -1 when called from spu_sched_reorder for new insns
2873 that don't have INSN_PRIORITY */
2875 prev_priority = INSN_PRIORITY (insn);
2878 /* Always try issueing more insns. spu_sched_reorder will decide
2879 when the cycle should be advanced. */
2883 /* This function is called for both TARGET_SCHED_REORDER and
2884 TARGET_SCHED_REORDER2. */
2886 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2887 rtx *ready, int *nreadyp, int clock)
2889 int i, nready = *nreadyp;
2890 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2895 if (nready <= 0 || pipe1_clock >= clock)
2898 /* Find any rtl insns that don't generate assembly insns and schedule
2900 for (i = nready - 1; i >= 0; i--)
2903 if (INSN_CODE (insn) == -1
2904 || INSN_CODE (insn) == CODE_FOR_blockage
2905 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2907 ready[i] = ready[nready - 1];
2908 ready[nready - 1] = insn;
2913 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2914 for (i = 0; i < nready; i++)
2915 if (INSN_CODE (ready[i]) != -1)
2918 switch (get_attr_type (insn))
2943 case TYPE_IPREFETCH:
2949 /* In the first scheduling phase, schedule loads and stores together
2950 to increase the chance they will get merged during postreload CSE. */
2951 if (!reload_completed && pipe_ls >= 0)
2953 insn = ready[pipe_ls];
2954 ready[pipe_ls] = ready[nready - 1];
2955 ready[nready - 1] = insn;
2959 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2963 /* When we have loads/stores in every cycle of the last 15 insns and
2964 we are about to schedule another load/store, emit an hbrp insn
2967 && spu_sched_length - spu_ls_first >= 4 * 15
2968 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2970 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2971 recog_memoized (insn);
2972 if (pipe0_clock < clock)
2973 PUT_MODE (insn, TImode);
2974 spu_sched_variable_issue (file, verbose, insn, -1);
2978 /* In general, we want to emit nops to increase dual issue, but dual
2979 issue isn't faster when one of the insns could be scheduled later
2980 without effecting the critical path. We look at INSN_PRIORITY to
2981 make a good guess, but it isn't perfect so -mdual-nops=n can be
2982 used to effect it. */
2983 if (in_spu_reorg && spu_dual_nops < 10)
2985 /* When we are at an even address and we are not issueing nops to
2986 improve scheduling then we need to advance the cycle. */
2987 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2988 && (spu_dual_nops == 0
2991 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2994 /* When at an odd address, schedule the highest priority insn
2995 without considering pipeline. */
2996 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2997 && (spu_dual_nops == 0
2999 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3004 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3005 pipe0 insn in the ready list, schedule it. */
3006 if (pipe0_clock < clock && pipe_0 >= 0)
3007 schedule_i = pipe_0;
3009 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3010 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3012 schedule_i = pipe_1;
3014 if (schedule_i > -1)
3016 insn = ready[schedule_i];
3017 ready[schedule_i] = ready[nready - 1];
3018 ready[nready - 1] = insn;
3024 /* INSN is dependent on DEP_INSN. */
3026 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3030 /* The blockage pattern is used to prevent instructions from being
3031 moved across it and has no cost. */
3032 if (INSN_CODE (insn) == CODE_FOR_blockage
3033 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3036 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3037 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3040 /* Make sure hbrps are spread out. */
3041 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3042 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3045 /* Make sure hints and hbrps are 2 cycles apart. */
3046 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3047 || INSN_CODE (insn) == CODE_FOR_hbr)
3048 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3049 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3052 /* An hbrp has no real dependency on other insns. */
3053 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3054 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3057 /* Assuming that it is unlikely an argument register will be used in
3058 the first cycle of the called function, we reduce the cost for
3059 slightly better scheduling of dep_insn. When not hinted, the
3060 mispredicted branch would hide the cost as well. */
3063 rtx target = get_branch_target (insn);
3064 if (GET_CODE (target) != REG || !set_of (target, insn))
3069 /* And when returning from a function, let's assume the return values
3070 are completed sooner too. */
3071 if (CALL_P (dep_insn))
3074 /* Make sure an instruction that loads from the back chain is schedule
3075 away from the return instruction so a hint is more likely to get
3077 if (INSN_CODE (insn) == CODE_FOR__return
3078 && (set = single_set (dep_insn))
3079 && GET_CODE (SET_DEST (set)) == REG
3080 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3083 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3084 scheduler makes every insn in a block anti-dependent on the final
3085 jump_insn. We adjust here so higher cost insns will get scheduled
3087 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3088 return insn_cost (dep_insn) - 3;
3093 /* Create a CONST_DOUBLE from a string. */
3095 spu_float_const (const char *string, enum machine_mode mode)
3097 REAL_VALUE_TYPE value;
3098 value = REAL_VALUE_ATOF (string, mode);
3099 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3103 spu_constant_address_p (rtx x)
3105 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3106 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3107 || GET_CODE (x) == HIGH);
3110 static enum spu_immediate
3111 which_immediate_load (HOST_WIDE_INT val)
3113 gcc_assert (val == trunc_int_for_mode (val, SImode));
3115 if (val >= -0x8000 && val <= 0x7fff)
3117 if (val >= 0 && val <= 0x3ffff)
3119 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3121 if ((val & 0xffff) == 0)
3127 /* Return true when OP can be loaded by one of the il instructions, or
3128 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3130 immediate_load_p (rtx op, enum machine_mode mode)
3132 if (CONSTANT_P (op))
3134 enum immediate_class c = classify_immediate (op, mode);
3135 return c == IC_IL1 || c == IC_IL1s
3136 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3141 /* Return true if the first SIZE bytes of arr is a constant that can be
3142 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3143 represent the size and offset of the instruction to use. */
3145 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3147 int cpat, run, i, start;
3151 for (i = 0; i < size && cpat; i++)
3159 else if (arr[i] == 2 && arr[i+1] == 3)
3161 else if (arr[i] == 0)
3163 while (arr[i+run] == run && i+run < 16)
3165 if (run != 4 && run != 8)
3170 if ((i & (run-1)) != 0)
3177 if (cpat && (run || size < 16))
3184 *pstart = start == -1 ? 16-run : start;
3190 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3191 it into a register. MODE is only valid when OP is a CONST_INT. */
3192 static enum immediate_class
3193 classify_immediate (rtx op, enum machine_mode mode)
3196 unsigned char arr[16];
3197 int i, j, repeated, fsmbi, repeat;
3199 gcc_assert (CONSTANT_P (op));
3201 if (GET_MODE (op) != VOIDmode)
3202 mode = GET_MODE (op);
3204 /* A V4SI const_vector with all identical symbols is ok. */
3207 && GET_CODE (op) == CONST_VECTOR
3208 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3209 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3210 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3211 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3212 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3213 op = CONST_VECTOR_ELT (op, 0);
3215 switch (GET_CODE (op))
3219 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3222 /* We can never know if the resulting address fits in 18 bits and can be
3223 loaded with ila. For now, assume the address will not overflow if
3224 the displacement is "small" (fits 'K' constraint). */
3225 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3227 rtx sym = XEXP (XEXP (op, 0), 0);
3228 rtx cst = XEXP (XEXP (op, 0), 1);
3230 if (GET_CODE (sym) == SYMBOL_REF
3231 && GET_CODE (cst) == CONST_INT
3232 && satisfies_constraint_K (cst))
3241 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3242 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3243 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3249 constant_to_array (mode, op, arr);
3251 /* Check that each 4-byte slot is identical. */
3253 for (i = 4; i < 16; i += 4)
3254 for (j = 0; j < 4; j++)
3255 if (arr[j] != arr[i + j])
3260 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3261 val = trunc_int_for_mode (val, SImode);
3263 if (which_immediate_load (val) != SPU_NONE)
3267 /* Any mode of 2 bytes or smaller can be loaded with an il
3269 gcc_assert (GET_MODE_SIZE (mode) > 2);
3273 for (i = 0; i < 16 && fsmbi; i++)
3274 if (arr[i] != 0 && repeat == 0)
3276 else if (arr[i] != 0 && arr[i] != repeat)
3279 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3281 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3294 static enum spu_immediate
3295 which_logical_immediate (HOST_WIDE_INT val)
3297 gcc_assert (val == trunc_int_for_mode (val, SImode));
3299 if (val >= -0x200 && val <= 0x1ff)
3301 if (val >= 0 && val <= 0xffff)
3303 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3305 val = trunc_int_for_mode (val, HImode);
3306 if (val >= -0x200 && val <= 0x1ff)
3308 if ((val & 0xff) == ((val >> 8) & 0xff))
3310 val = trunc_int_for_mode (val, QImode);
3311 if (val >= -0x200 && val <= 0x1ff)
3318 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3321 const_vector_immediate_p (rtx x)
3324 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3325 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3326 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3327 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3333 logical_immediate_p (rtx op, enum machine_mode mode)
3336 unsigned char arr[16];
3339 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3340 || GET_CODE (op) == CONST_VECTOR);
3342 if (GET_CODE (op) == CONST_VECTOR
3343 && !const_vector_immediate_p (op))
3346 if (GET_MODE (op) != VOIDmode)
3347 mode = GET_MODE (op);
3349 constant_to_array (mode, op, arr);
3351 /* Check that bytes are repeated. */
3352 for (i = 4; i < 16; i += 4)
3353 for (j = 0; j < 4; j++)
3354 if (arr[j] != arr[i + j])
3357 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3358 val = trunc_int_for_mode (val, SImode);
3360 i = which_logical_immediate (val);
3361 return i != SPU_NONE && i != SPU_IOHL;
3365 iohl_immediate_p (rtx op, enum machine_mode mode)
3368 unsigned char arr[16];
3371 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3372 || GET_CODE (op) == CONST_VECTOR);
3374 if (GET_CODE (op) == CONST_VECTOR
3375 && !const_vector_immediate_p (op))
3378 if (GET_MODE (op) != VOIDmode)
3379 mode = GET_MODE (op);
3381 constant_to_array (mode, op, arr);
3383 /* Check that bytes are repeated. */
3384 for (i = 4; i < 16; i += 4)
3385 for (j = 0; j < 4; j++)
3386 if (arr[j] != arr[i + j])
3389 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3390 val = trunc_int_for_mode (val, SImode);
3392 return val >= 0 && val <= 0xffff;
3396 arith_immediate_p (rtx op, enum machine_mode mode,
3397 HOST_WIDE_INT low, HOST_WIDE_INT high)
3400 unsigned char arr[16];
3403 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3404 || GET_CODE (op) == CONST_VECTOR);
3406 if (GET_CODE (op) == CONST_VECTOR
3407 && !const_vector_immediate_p (op))
3410 if (GET_MODE (op) != VOIDmode)
3411 mode = GET_MODE (op);
3413 constant_to_array (mode, op, arr);
3415 if (VECTOR_MODE_P (mode))
3416 mode = GET_MODE_INNER (mode);
3418 bytes = GET_MODE_SIZE (mode);
3419 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3421 /* Check that bytes are repeated. */
3422 for (i = bytes; i < 16; i += bytes)
3423 for (j = 0; j < bytes; j++)
3424 if (arr[j] != arr[i + j])
3428 for (j = 1; j < bytes; j++)
3429 val = (val << 8) | arr[j];
3431 val = trunc_int_for_mode (val, mode);
3433 return val >= low && val <= high;
3437 - any 32-bit constant (SImode, SFmode)
3438 - any constant that can be generated with fsmbi (any mode)
3439 - a 64-bit constant where the high and low bits are identical
3441 - a 128-bit constant where the four 32-bit words match. */
3443 spu_legitimate_constant_p (rtx x)
3445 if (GET_CODE (x) == HIGH)
3447 /* V4SI with all identical symbols is valid. */
3449 && GET_MODE (x) == V4SImode
3450 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3451 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3452 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3453 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3454 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3455 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3457 if (GET_CODE (x) == CONST_VECTOR
3458 && !const_vector_immediate_p (x))
3463 /* Valid address are:
3464 - symbol_ref, label_ref, const
3466 - reg + const, where either reg or const is 16 byte aligned
3467 - reg + reg, alignment doesn't matter
3468 The alignment matters in the reg+const case because lqd and stqd
3469 ignore the 4 least significant bits of the const. (TODO: It might be
3470 preferable to allow any alignment and fix it up when splitting.) */
3472 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3473 rtx x, int reg_ok_strict)
3475 if (mode == TImode && GET_CODE (x) == AND
3476 && GET_CODE (XEXP (x, 1)) == CONST_INT
3477 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3479 switch (GET_CODE (x))