/* * Copyright © 2014 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Jason Ekstrand (jason@jlekstrand.net) * */ #include "nir.h" #include "nir_builder.h" #include "nir_constant_expressions.h" #include "nir_deref.h" #include /* * Implements SSA-based constant folding. */ struct constant_fold_state { bool has_load_constant; bool has_indirect_load_const; }; static bool try_fold_alu(nir_builder *b, nir_alu_instr *alu) { nir_const_value src[NIR_MAX_VEC_COMPONENTS][NIR_MAX_VEC_COMPONENTS]; if (!alu->dest.dest.is_ssa) return false; /* In the case that any outputs/inputs have unsized types, then we need to * guess the bit-size. In this case, the validator ensures that all * bit-sizes match so we can just take the bit-size from first * output/input with an unsized type. If all the outputs/inputs are sized * then we don't need to guess the bit-size at all because the code we * generate for constant opcodes in this case already knows the sizes of * the types involved and does not need the provided bit-size for anything * (although it still requires to receive a valid bit-size). */ unsigned bit_size = 0; if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].output_type)) bit_size = alu->dest.dest.ssa.bit_size; for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { if (!alu->src[i].src.is_ssa) return false; if (bit_size == 0 && !nir_alu_type_get_type_size(nir_op_infos[alu->op].input_types[i])) bit_size = alu->src[i].src.ssa->bit_size; nir_instr *src_instr = alu->src[i].src.ssa->parent_instr; if (src_instr->type != nir_instr_type_load_const) return false; nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr); for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); j++) { src[i][j] = load_const->value[alu->src[i].swizzle[j]]; } /* We shouldn't have any source modifiers in the optimization loop. */ assert(!alu->src[i].abs && !alu->src[i].negate); } if (bit_size == 0) bit_size = 32; /* We shouldn't have any saturate modifiers in the optimization loop. */ assert(!alu->dest.saturate); nir_const_value dest[NIR_MAX_VEC_COMPONENTS]; nir_const_value *srcs[NIR_MAX_VEC_COMPONENTS]; memset(dest, 0, sizeof(dest)); for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; ++i) srcs[i] = src[i]; nir_eval_const_opcode(alu->op, dest, alu->dest.dest.ssa.num_components, bit_size, srcs, b->shader->info.float_controls_execution_mode); b->cursor = nir_before_instr(&alu->instr); nir_ssa_def *imm = nir_build_imm(b, alu->dest.dest.ssa.num_components, alu->dest.dest.ssa.bit_size, dest); nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, imm); nir_instr_remove(&alu->instr); nir_instr_free(&alu->instr); return true; } static nir_const_value * const_value_for_deref(nir_deref_instr *deref) { if (!nir_deref_mode_is(deref, nir_var_mem_constant)) return NULL; nir_deref_path path; nir_deref_path_init(&path, deref, NULL); if (path.path[0]->deref_type != nir_deref_type_var) goto fail; nir_variable *var = path.path[0]->var; assert(var->data.mode == nir_var_mem_constant); if (var->constant_initializer == NULL) goto fail; nir_constant *c = var->constant_initializer; nir_const_value *v = NULL; /* Vector value for array-deref-of-vec */ for (unsigned i = 1; path.path[i] != NULL; i++) { nir_deref_instr *p = path.path[i]; switch (p->deref_type) { case nir_deref_type_var: unreachable("Deref paths can only start with a var deref"); case nir_deref_type_array: { assert(v == NULL); if (!nir_src_is_const(p->arr.index)) goto fail; uint64_t idx = nir_src_as_uint(p->arr.index); if (c->num_elements > 0) { assert(glsl_type_is_array(path.path[i-1]->type)); if (idx >= c->num_elements) goto fail; c = c->elements[idx]; } else { assert(glsl_type_is_vector(path.path[i-1]->type)); assert(glsl_type_is_scalar(p->type)); if (idx >= NIR_MAX_VEC_COMPONENTS) goto fail; v = &c->values[idx]; } break; } case nir_deref_type_struct: assert(glsl_type_is_struct(path.path[i-1]->type)); assert(v == NULL && c->num_elements > 0); if (p->strct.index >= c->num_elements) goto fail; c = c->elements[p->strct.index]; break; default: goto fail; } } /* We have to have ended at a vector */ assert(c->num_elements == 0); return v ? v : c->values; fail: nir_deref_path_finish(&path); return NULL; } static bool try_fold_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, struct constant_fold_state *state) { switch (intrin->intrinsic) { case nir_intrinsic_demote_if: case nir_intrinsic_discard_if: case nir_intrinsic_terminate_if: if (nir_src_is_const(intrin->src[0])) { if (nir_src_as_bool(intrin->src[0])) { b->cursor = nir_before_instr(&intrin->instr); nir_intrinsic_op op; switch (intrin->intrinsic) { case nir_intrinsic_discard_if: op = nir_intrinsic_discard; break; case nir_intrinsic_demote_if: op = nir_intrinsic_demote; break; case nir_intrinsic_terminate_if: op = nir_intrinsic_terminate; break; default: unreachable("invalid intrinsic"); } nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op); nir_builder_instr_insert(b, &new_instr->instr); } nir_instr_remove(&intrin->instr); return true; } return false; case nir_intrinsic_load_deref: { nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); nir_const_value *v = const_value_for_deref(deref); if (v) { b->cursor = nir_before_instr(&intrin->instr); nir_ssa_def *val = nir_build_imm(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, v); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val); nir_instr_remove(&intrin->instr); return true; } return false; } case nir_intrinsic_load_constant: { state->has_load_constant = true; if (!nir_src_is_const(intrin->src[0])) { state->has_indirect_load_const = true; return false; } unsigned offset = nir_src_as_uint(intrin->src[0]); unsigned base = nir_intrinsic_base(intrin); unsigned range = nir_intrinsic_range(intrin); assert(base + range <= b->shader->constant_data_size); b->cursor = nir_before_instr(&intrin->instr); nir_ssa_def *val; if (offset >= range) { val = nir_ssa_undef(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size); } else { nir_const_value imm[NIR_MAX_VEC_COMPONENTS]; memset(imm, 0, sizeof(imm)); uint8_t *data = (uint8_t*)b->shader->constant_data + base; for (unsigned i = 0; i < intrin->num_components; i++) { unsigned bytes = intrin->dest.ssa.bit_size / 8; bytes = MIN2(bytes, range - offset); memcpy(&imm[i].u64, data + offset, bytes); offset += bytes; } val = nir_build_imm(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, imm); } nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val); nir_instr_remove(&intrin->instr); return true; } case nir_intrinsic_vote_any: case nir_intrinsic_vote_all: case nir_intrinsic_read_invocation: case nir_intrinsic_read_first_invocation: case nir_intrinsic_shuffle: case nir_intrinsic_shuffle_xor: case nir_intrinsic_shuffle_up: case nir_intrinsic_shuffle_down: case nir_intrinsic_quad_broadcast: case nir_intrinsic_quad_swap_horizontal: case nir_intrinsic_quad_swap_vertical: case nir_intrinsic_quad_swap_diagonal: case nir_intrinsic_quad_swizzle_amd: case nir_intrinsic_masked_swizzle_amd: /* All of these have the data payload in the first source. They may * have a second source with a shuffle index but that doesn't matter if * the data is constant. */ if (nir_src_is_const(intrin->src[0])) { nir_ssa_def_rewrite_uses(&intrin->dest.ssa, intrin->src[0].ssa); nir_instr_remove(&intrin->instr); return true; } return false; case nir_intrinsic_vote_feq: case nir_intrinsic_vote_ieq: if (nir_src_is_const(intrin->src[0])) { b->cursor = nir_before_instr(&intrin->instr); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_imm_true(b)); nir_instr_remove(&intrin->instr); return true; } return false; default: return false; } } static bool try_fold_instr(nir_builder *b, nir_instr *instr, void *_state) { switch (instr->type) { case nir_instr_type_alu: return try_fold_alu(b, nir_instr_as_alu(instr)); case nir_instr_type_intrinsic: return try_fold_intrinsic(b, nir_instr_as_intrinsic(instr), _state); default: /* Don't know how to constant fold */ return false; } } bool nir_opt_constant_folding(nir_shader *shader) { struct constant_fold_state state; state.has_load_constant = false; state.has_indirect_load_const = false; bool progress = nir_shader_instructions_pass(shader, try_fold_instr, nir_metadata_block_index | nir_metadata_dominance, &state); /* This doesn't free the constant data if there are no constant loads because * the data might still be used but the loads have been lowered to load_ubo */ if (state.has_load_constant && !state.has_indirect_load_const && shader->constant_data_size) { ralloc_free(shader->constant_data); shader->constant_data = NULL; shader->constant_data_size = 0; } return progress; }