OSDN Git Service

i915g: Fix bug in shader optimizer.
[android-x86/external-mesa.git] / src / gallium / drivers / i915 / i915_fpc_optimize.c
1 /**************************************************************************
2  * 
3  * Copyright 2011 The Chromium OS authors.
4  * All Rights Reserved.
5  * 
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  * 
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  * 
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  * 
26  **************************************************************************/
27
28 #include "i915_reg.h"
29 #include "i915_context.h"
30 #include "i915_fpc.h"
31
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38
39 static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
40 {
41    return (d1->Register.File == d2->Register.File &&
42            d1->Register.Indirect == d2->Register.Indirect &&
43            d1->Register.Dimension == d2->Register.Dimension &&
44            d1->Register.Index == d2->Register.Index);
45 }
46
47 static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
48 {
49    return (d1->Register.File == d2->Register.File &&
50            d1->Register.Indirect == d2->Register.Indirect &&
51            d1->Register.Dimension == d2->Register.Dimension &&
52            d1->Register.Index == d2->Register.Index &&
53            d1->Register.Absolute == d2->Register.Absolute &&
54            d1->Register.Negate == d2->Register.Negate);
55 }
56
57 static boolean is_unswizzled(struct i915_full_src_register* r,
58                              unsigned write_mask)
59 {
60    if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
61       return FALSE;
62    if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
63       return FALSE;
64    if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
65       return FALSE;
66    if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
67       return FALSE;
68    return TRUE;
69 }
70
71 static boolean op_commutes(unsigned opcode)
72 {
73    if (opcode == TGSI_OPCODE_ADD) return TRUE;
74    if (opcode == TGSI_OPCODE_MUL) return TRUE;
75    return FALSE;
76 }
77
78 static unsigned op_neutral_element(unsigned opcode)
79 {
80    if (opcode == TGSI_OPCODE_ADD)
81       return TGSI_SWIZZLE_ZERO;
82    if (opcode == TGSI_OPCODE_MUL)
83       return TGSI_SWIZZLE_ONE;
84
85    debug_printf("Unknown opcode %d\n",opcode);
86    return TGSI_SWIZZLE_ZERO;
87 }
88
89 /*
90  * Sets the swizzle to the neutral element for the operation for the bits
91  * of writemask which are set, swizzle to identity otherwise.
92  */
93 static void set_neutral_element_swizzle(struct i915_full_src_register* r,
94                                         unsigned write_mask,
95                                         unsigned neutral)
96 {
97    if ( write_mask & TGSI_WRITEMASK_X )
98       r->Register.SwizzleX = neutral;
99    else
100       r->Register.SwizzleX = TGSI_SWIZZLE_X;
101
102    if ( write_mask & TGSI_WRITEMASK_Y )
103       r->Register.SwizzleY = neutral;
104    else
105       r->Register.SwizzleY = TGSI_SWIZZLE_Y;
106
107    if ( write_mask & TGSI_WRITEMASK_Z )
108       r->Register.SwizzleZ = neutral;
109    else
110       r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
111
112    if ( write_mask & TGSI_WRITEMASK_W )
113       r->Register.SwizzleW = neutral;
114    else
115       r->Register.SwizzleW = TGSI_SWIZZLE_W;
116 }
117
118 /*
119  * Optimize away things like:
120  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
121  *    MOV OUT[0].w, TEMP[2]
122  * into: 
123  *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
124  * This is useful for optimizing texenv.
125  */
126 static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next)
127 {
128    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
129         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
130         op_commutes(current->FullInstruction.Instruction.Opcode) &&
131         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
132         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
133         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
134         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
135         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
136         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
137         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
138    {
139       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
140
141       set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
142       set_neutral_element_swizzle(&current->FullInstruction.Src[0],
143                                   next->FullInstruction.Dst[0].Register.WriteMask,
144                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
145
146       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
147                                                            next->FullInstruction.Dst[0].Register.WriteMask;
148       return;
149    }
150
151    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
152         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
153         op_commutes(current->FullInstruction.Instruction.Opcode) &&
154         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
155         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
156         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
157         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
158         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
159         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
160         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
161    {
162       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
163
164       set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
165       set_neutral_element_swizzle(&current->FullInstruction.Src[1],
166                                   next->FullInstruction.Dst[0].Register.WriteMask,
167                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
168
169       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
170                                                            next->FullInstruction.Dst[0].Register.WriteMask;
171       return;
172    }
173 }
174
175 static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
176 {
177    o->File      = i->File;
178    o->Indirect  = i->Indirect;
179    o->Dimension = i->Dimension;
180    o->Index     = i->Index;
181    o->SwizzleX  = i->SwizzleX;
182    o->SwizzleY  = i->SwizzleY;
183    o->SwizzleZ  = i->SwizzleZ;
184    o->SwizzleW  = i->SwizzleW;
185    o->Absolute  = i->Absolute;
186    o->Negate    = i->Negate;
187 }
188
189 static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
190 {
191    o->File      = i->File;
192    o->WriteMask = i->WriteMask;
193    o->Indirect  = i->Indirect;
194    o->Dimension = i->Dimension;
195    o->Index     = i->Index;
196 }
197
198 static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
199 {
200    memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
201    memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
202
203    copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
204
205    copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
206    copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
207    copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
208 }
209
210 static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
211 {
212    if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
213       memcpy(o, i, sizeof(*o));
214    else
215       copy_instruction(&o->FullInstruction, &i->FullInstruction);
216
217 }
218
219 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
220 {
221    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
222    struct tgsi_parse_context parse;
223    int i = 0;
224
225    out_tokens->NumTokens = 0;
226
227    /* Count the tokens */
228    tgsi_parse_init( &parse, tokens );
229    while( !tgsi_parse_end_of_tokens( &parse ) ) {
230       tgsi_parse_token( &parse );
231       out_tokens->NumTokens++;
232    }
233    tgsi_parse_free (&parse);
234
235    /* Allocate our tokens */
236    out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
237
238    tgsi_parse_init( &parse, tokens );
239    while( !tgsi_parse_end_of_tokens( &parse ) ) {
240       tgsi_parse_token( &parse );
241       copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
242
243       if (i > 0)
244          i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
245
246       i++;
247    }
248    tgsi_parse_free (&parse);
249
250    return out_tokens;
251 }
252
253 void i915_optimize_free(struct i915_token_list* tokens)
254 {
255    free(tokens->Tokens);
256    free(tokens);
257 }
258
259