On constant folding, first switch on the op type, handling each operation using a function defined on hlsl_constant_ops.c. Each of these operations switches on the data type, if needed.
Since constant value casting had to be rewritten, support was extended to all numeric types.
Signed-off-by: Francisco Casas fcasas@codeweavers.com ---
I suggest moving the definitions of the operations to this new file since they may end up being considerably large and similar.
I am not sure if I should be on the copyright notice of the new files.
Signed-off-by: Francisco Casas fcasas@codeweavers.com --- Makefile.am | 2 +- libs/vkd3d-shader/hlsl_codegen.c | 104 ++++--------- libs/vkd3d-shader/hlsl_constant_ops.c | 209 ++++++++++++++++++++++++++ libs/vkd3d-shader/hlsl_constant_ops.h | 30 ++++ 4 files changed, 268 insertions(+), 77 deletions(-) create mode 100644 libs/vkd3d-shader/hlsl_constant_ops.c create mode 100644 libs/vkd3d-shader/hlsl_constant_ops.h
diff --git a/Makefile.am b/Makefile.am index 20fee06d..6bf8e1e6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -194,6 +194,7 @@ libvkd3d_shader_la_SOURCES = \ libs/vkd3d-shader/hlsl.c \ libs/vkd3d-shader/hlsl.h \ libs/vkd3d-shader/hlsl_codegen.c \ + libs/vkd3d-shader/hlsl_constant_ops.c \ libs/vkd3d-shader/hlsl_sm1.c \ libs/vkd3d-shader/hlsl_sm4.c \ libs/vkd3d-shader/preproc.h \ @@ -304,7 +305,6 @@ XFAIL_TESTS = \ tests/hlsl-storage-qualifiers.shader_test \ tests/hlsl-vector-indexing.shader_test \ tests/hlsl-vector-indexing-uniform.shader_test \ - tests/max.shader_test \ tests/sampler.shader_test \ tests/texture-load.shader_test \ tests/texture-load-typed.shader_test \ diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 75716bdf..9cc09738 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -19,6 +19,7 @@ */
#include "hlsl.h" +#include "hlsl_constant_ops.h" #include <stdio.h>
/* Split uniforms into two variables representing the constant and temp @@ -652,11 +653,13 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins return false; }
+ static bool fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_constant *arg1, *arg2 = NULL, *res; struct hlsl_ir_expr *expr; - unsigned int i, dimx; + unsigned int i; + bool success;
if (instr->type != HLSL_IR_EXPR) return false; @@ -670,92 +673,41 @@ static bool fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi arg1 = hlsl_ir_constant(expr->operands[0].node); if (expr->operands[1].node) arg2 = hlsl_ir_constant(expr->operands[1].node); - dimx = instr->data_type->dimx;
if (!(res = hlsl_alloc(ctx, sizeof(*res)))) return false; init_node(&res->node, HLSL_IR_CONSTANT, instr->data_type, instr->loc);
- switch (instr->data_type->base_type) + switch (expr->op) { - case HLSL_TYPE_FLOAT: - { - switch (expr->op) - { - case HLSL_OP1_CAST: - if (instr->data_type->dimx != arg1->node.data_type->dimx - || instr->data_type->dimy != arg1->node.data_type->dimy) - { - FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, arg1->node.data_type), - debug_hlsl_type(ctx, instr->data_type)); - vkd3d_free(res); - return false; - } - - switch (arg1->node.data_type->base_type) - { - case HLSL_TYPE_INT: - for (i = 0; i < dimx; ++i) - res->value[i].f = arg1->value[i].i; - break; - - case HLSL_TYPE_UINT: - for (i = 0; i < dimx; ++i) - res->value[i].f = arg1->value[i].u; - break; - - default: - FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, arg1->node.data_type), - debug_hlsl_type(ctx, instr->data_type)); - vkd3d_free(res); - return false; - } - break; - - default: - FIXME("Fold float op %#x.\n", expr->op); - vkd3d_free(res); - return false; - } + case HLSL_OP1_CAST: + success = constant_op1_cast(ctx, res, arg1); break; - } - - case HLSL_TYPE_UINT: - { - switch (expr->op) - { - case HLSL_OP1_NEG: - for (i = 0; i < instr->data_type->dimx; ++i) - res->value[i].u = -arg1->value[i].u; - break; - - case HLSL_OP2_ADD: - for (i = 0; i < instr->data_type->dimx; ++i) - res->value[i].u = arg1->value[i].u + arg2->value[i].u; - break; - - case HLSL_OP2_MUL: - for (i = 0; i < instr->data_type->dimx; ++i) - res->value[i].u = arg1->value[i].u * arg2->value[i].u; - break; - - default: - FIXME("Fold uint op %#x.\n", expr->op); - vkd3d_free(res); - return false; - } + case HLSL_OP1_NEG: + success = constant_op1_neg(ctx, res, arg1); + break; + case HLSL_OP2_ADD: + success = constant_op2_add(ctx, res, arg1, arg2); + break; + case HLSL_OP2_MUL: + success = constant_op2_mul(ctx, res, arg1, arg2); break; - } - default: - FIXME("Fold type %#x op %#x.\n", instr->data_type->base_type, expr->op); - vkd3d_free(res); - return false; + FIXME("Fold op %#x.\n", expr->op); + success = 0; }
- list_add_before(&expr->node.entry, &res->node.entry); - replace_node(&expr->node, &res->node); - return true; + if (success) + { + list_add_before(&expr->node.entry, &res->node.entry); + replace_node(&expr->node, &res->node); + return true; + } + else + { + vkd3d_free(res); + return false; + } }
static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) diff --git a/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d-shader/hlsl_constant_ops.c new file mode 100644 index 00000000..406d1058 --- /dev/null +++ b/libs/vkd3d-shader/hlsl_constant_ops.c @@ -0,0 +1,209 @@ +/* + * HLSL constant value operations for constant folding + * + * Copyright 2022 Francisco Casas for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "hlsl_constant_ops.h" + +int constant_op1_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *tgt, + struct hlsl_ir_constant *src) +{ + uint32_t u; + int32_t i; + double d; + float f; + + if (tgt->node.data_type->dimx != src->node.data_type->dimx || + tgt->node.data_type->dimy != src->node.data_type->dimy) + { + FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), + debug_hlsl_type(ctx, tgt->node.data_type)); + return 0; + } + + for (int k = 0; k < 4; k++) + { + switch (src->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + u = src->value[k].f; + i = src->value[k].f; + f = src->value[k].f; + d = src->value[k].f; + break; + case HLSL_TYPE_DOUBLE: + u = src->value[k].d; + i = src->value[k].d; + f = src->value[k].d; + d = src->value[k].d; + break; + case HLSL_TYPE_INT: + u = src->value[k].i; + i = src->value[k].i; + f = src->value[k].i; + d = src->value[k].i; + break; + case HLSL_TYPE_UINT: + u = src->value[k].u; + i = src->value[k].u; + f = src->value[k].u; + d = src->value[k].u; + break; + case HLSL_TYPE_BOOL: + u = !!src->value[k].u; + i = !!src->value[k].u; + f = !!src->value[k].u; + d = !!src->value[k].u; + break; + default: + FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), + debug_hlsl_type(ctx, tgt->node.data_type)); + return 0; + } + switch (tgt->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + tgt->value[k].f = f; + break; + case HLSL_TYPE_DOUBLE: + tgt->value[k].d = d; + break; + case HLSL_TYPE_INT: + tgt->value[k].i = i; + break; + case HLSL_TYPE_UINT: + tgt->value[k].u = u; + break; + case HLSL_TYPE_BOOL: + tgt->value[k].u = (!!u) * 0xffffffff; + break; + default: + FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), + debug_hlsl_type(ctx, tgt->node.data_type)); + return 0; + } + } + return 1; +} + +int constant_op1_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *tgt, + struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = tgt->node.data_type->base_type; + + assert(type == src->node.data_type->base_type); + + for (int k = 0; k < 4; k++) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + tgt->value[k].f = -src->value[k].f; + break; + case HLSL_TYPE_DOUBLE: + tgt->value[k].d = -src->value[k].d; + break; + case HLSL_TYPE_INT: + tgt->value[k].i = -src->value[k].i; + break; + case HLSL_TYPE_UINT: + tgt->value[k].u = -src->value[k].u; + break; + default: + FIXME("Fold "%s" for type %s.", debug_hlsl_expr_op(HLSL_OP1_NEG), + debug_hlsl_type(ctx, tgt->node.data_type)); + return 0; + } + } + return 1; +} + +int constant_op2_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *tgt, + struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = tgt->node.data_type->base_type; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + + for (int k = 0; k < 4; k++) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + tgt->value[k].f = src1->value[k].f + src2->value[k].f; + break; + case HLSL_TYPE_DOUBLE: + tgt->value[k].d = src1->value[k].d + src2->value[k].d; + break; + case HLSL_TYPE_INT: + tgt->value[k].i = src1->value[k].i + src2->value[k].i; + break; + case HLSL_TYPE_UINT: + tgt->value[k].u = src1->value[k].u + src2->value[k].u; + break; + default: + FIXME("Fold "%s" for type %s.", debug_hlsl_expr_op(HLSL_OP2_ADD), + debug_hlsl_type(ctx, tgt->node.data_type)); + return 0; + } + } + return 1; +} + +int constant_op2_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *tgt, + struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = tgt->node.data_type->base_type; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + + for (int k = 0; k < 4; k++) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + tgt->value[k].f = src1->value[k].f * src2->value[k].f; + break; + case HLSL_TYPE_DOUBLE: + tgt->value[k].d = src1->value[k].d * src2->value[k].d; + break; + case HLSL_TYPE_INT: + tgt->value[k].i = src1->value[k].i * src2->value[k].i; + break; + case HLSL_TYPE_UINT: + tgt->value[k].u = src1->value[k].u * src2->value[k].u; + break; + default: + FIXME("Fold "%s" for type %s.", debug_hlsl_expr_op(HLSL_OP2_MUL), + debug_hlsl_type(ctx, tgt->node.data_type)); + return 0; + } + } + return 1; +} + + + + diff --git a/libs/vkd3d-shader/hlsl_constant_ops.h b/libs/vkd3d-shader/hlsl_constant_ops.h new file mode 100644 index 00000000..17a8a2e8 --- /dev/null +++ b/libs/vkd3d-shader/hlsl_constant_ops.h @@ -0,0 +1,30 @@ +/* + * HLSL constant value operations for constant folding + * + * Copyright 2022 Francisco Casas for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "hlsl.h" + +int constant_op1_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *tgt, + struct hlsl_ir_constant *src); +int constant_op1_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *tgt, + struct hlsl_ir_constant *src); +int constant_op2_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *tgt, + struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2); +int constant_op2_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *tgt, + struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2);
Hi,
On 03/01/22 15:57, Francisco Casas wrote:
On constant folding, first switch on the op type, handling each operation using a function defined on hlsl_constant_ops.c.
Creating a new file makes sense to me, but while you're doing it it also makes sense to copy there the whole implementation of fold_constants(), doesn't it? It logically related to hlsl_constant_ops.c, and it also spares you adding a lot of functions to the header file (also, if you do that you probably have only one function to add to the header, fold_constants() itself, and you can put it in hlsl.h instead of creating a new file).
While I don't have any specific objection against it, is there a particular reason for inverting the switch on the type and on the operation?
Each of these operations switches on the data type, if needed.
As I just said on the other thread (where I replied before noticing this one), watch out for undefined behavior that some operations entail.
I am not sure if I should be on the copyright notice of the new files.
What you did looks fine to me. It doesn't look like we're too strict about code attribution anyway: it seems that the first developer to create a file leaves their name on it, but nobody ever adds their name to a file they change. At least, I am not aware of that.
Giovanni.
January 4, 2022 11:36 AM, "Giovanni Mascellani" gmascellani@codeweavers.com wrote:
Hi,
On 03/01/22 15:57, Francisco Casas wrote:
On constant folding, first switch on the op type, handling each operation using a function defined on hlsl_constant_ops.c.
Creating a new file makes sense to me, but while you're doing it it also makes sense to copy there the whole implementation of fold_constants(), doesn't it? It logically related to hlsl_constant_ops.c, and it also spares you adding a lot of functions to the header file (also, if you do that you probably have only one function to add to the header, fold_constants() itself, and you can put it in hlsl.h instead of creating a new file).
That sounds like a very good idea to me!
While I don't have any specific objection against it, is there a particular reason for inverting the switch on the type and on the operation?
Zebediah mentioned that in the future we may want to add operations that aren't typed. Also, I think that splitting the problem op-wise allows for a better organization since there are fairly more ops than types.
Each of these operations switches on the data type, if needed.
As I just said on the other thread (where I replied before noticing this one), watch out for undefined behavior that some operations entail.
Noted. The only problem with (+), (*), and (-), I can think of is signed integer overflow. I am not sure yet if implicit castings in C may result in undefined behaviors, I must check.
Hi,
On 04/01/22 17:59, Francisco Casas wrote:
Noted. The only problem with (+), (*), and (-), I can think of is signed integer overflow.
I think so. Notice that for these three operations (and unary negation) you can just treat signed numbers as unsigned and everything will be fine, at least on the platforms we target.
For floating point operations the result can unfortunately depend on the currently set floating point environment (so in theory we'd have to check what native compiler does and set the environment accordingly), but I guess we can ignore this problem for the moment. I don't think those operations should cause floating point exceptions, with the possible exception of signaling NaNs, which I don't really know how they work in practice. This is something that can be deferred to later too, I think.
I am not sure yet if implicit castings in C may result in undefined behaviors, I must check.
I don't think so. Implicit casting is not different from explicit casting in what it does, only in when it is allowed, I think.
Giovanni.
On Tue, Jan 4, 2022 at 5:59 PM Francisco Casas fcasas@codeweavers.com wrote:
January 4, 2022 11:36 AM, "Giovanni Mascellani" gmascellani@codeweavers.com wrote:
Hi,
On 03/01/22 15:57, Francisco Casas wrote:
On constant folding, first switch on the op type, handling each operation using a function defined on hlsl_constant_ops.c.
Creating a new file makes sense to me, but while you're doing it it also makes sense to copy there the whole implementation of fold_constants(), doesn't it? It logically related to hlsl_constant_ops.c, and it also spares you adding a lot of functions to the header file (also, if you do that you probably have only one function to add to the header, fold_constants() itself, and you can put it in hlsl.h instead of creating a new file).
That sounds like a very good idea to me!
While I don't have any specific objection against it, is there a particular reason for inverting the switch on the type and on the operation?
Zebediah mentioned that in the future we may want to add operations that aren't typed. Also, I think that splitting the problem op-wise allows for a better organization since there are fairly more ops than types.
I agree, it looks much better to me this way.
Each of these operations switches on the data type, if needed.
As I just said on the other thread (where I replied before noticing this one), watch out for undefined behavior that some operations entail.
Noted. The only problem with (+), (*), and (-), I can think of is signed integer overflow. I am not sure yet if implicit castings in C may result in undefined behaviors, I must check.
Another one that potentially needs some care is folding float expressions. E.g. even transforming a "(x + a) + b" into "x + (a + b)" (with x float, a and b constants) can technically affect the final result, depending on the specific values. We need to replicate MS's compiler in this regard, or at least support compatible behavior as one of the options. I've been looking into this a bit and it appears that the native compiler always allows constant folding transformations, regardless of the presence of the precise keyword or the D3DCOMPILE_IEEE_STRICTNESS flag to D3DCompile(). Other transformations / optimizations are affected but I guess we'll think about those when we get there.
So, as far as constant folding is concerned, always optimizing float constants should be okay. Or we could have our own compilation flag to control that, in case we feel that behavior might be useful to some non-Wine user.
Hi,
Il 19/01/22 13:10, Matteo Bruni ha scritto:
Another one that potentially needs some care is folding float expressions. E.g. even transforming a "(x + a) + b" into "x + (a + b)" (with x float, a and b constants) can technically affect the final result, depending on the specific values. We need to replicate MS's compiler in this regard, or at least support compatible behavior as one of the options. I've been looking into this a bit and it appears that the native compiler always allows constant folding transformations, regardless of the presence of the precise keyword or the D3DCOMPILE_IEEE_STRICTNESS flag to D3DCompile(). Other transformations / optimizations are affected but I guess we'll think about those when we get there.
So, as far as constant folding is concerned, always optimizing float constants should be okay. Or we could have our own compilation flag to control that, in case we feel that behavior might be useful to some non-Wine user.
I am not completely sure of what you mean here. You say that you fear that if we have "(x + y) + z" with x, y and z constants, native compiler could transform this to "x + (y + z)" before doing constant folding?
Giovanni.
On Wed, Jan 19, 2022 at 1:42 PM Giovanni Mascellani gmascellani@codeweavers.com wrote:
Hi,
Il 19/01/22 13:10, Matteo Bruni ha scritto:
Another one that potentially needs some care is folding float expressions. E.g. even transforming a "(x + a) + b" into "x + (a + b)" (with x float, a and b constants) can technically affect the final result, depending on the specific values. We need to replicate MS's compiler in this regard, or at least support compatible behavior as one of the options. I've been looking into this a bit and it appears that the native compiler always allows constant folding transformations, regardless of the presence of the precise keyword or the D3DCOMPILE_IEEE_STRICTNESS flag to D3DCompile(). Other transformations / optimizations are affected but I guess we'll think about those when we get there.
So, as far as constant folding is concerned, always optimizing float constants should be okay. Or we could have our own compilation flag to control that, in case we feel that behavior might be useful to some non-Wine user.
I am not completely sure of what you mean here. You say that you fear that if we have "(x + y) + z" with x, y and z constants, native compiler could transform this to "x + (y + z)" before doing constant folding?
My example would be something like:
result = x + 4 + 2;
which the compiler then transforms into:
result = x + 6;
by means of constant folding. Assuming x is float, technically those two might not be the same because of rounding errors (although probably there is no danger with these particular values).
But that's moot for d3dcompiler. My question is whether any other vkd3d-shader user would care. My guess is probably not, and even in that case we can wait to add the feature until said user comes up.
On 1/3/22 08:57, Francisco Casas wrote:
On constant folding, first switch on the op type, handling each operation using a function defined on hlsl_constant_ops.c. Each of these operations switches on the data type, if needed.
Since constant value casting had to be rewritten, support was extended to all numeric types.
Signed-off-by: Francisco Casas fcasas@codeweavers.com
I suggest moving the definitions of the operations to this new file since they may end up being considerably large and similar.
I am not sure if I should be on the copyright notice of the new files.
Signed-off-by: Francisco Casas fcasas@codeweavers.com
Makefile.am | 2 +- libs/vkd3d-shader/hlsl_codegen.c | 104 ++++--------- libs/vkd3d-shader/hlsl_constant_ops.c | 209 ++++++++++++++++++++++++++ libs/vkd3d-shader/hlsl_constant_ops.h | 30 ++++ 4 files changed, 268 insertions(+), 77 deletions(-) create mode 100644 libs/vkd3d-shader/hlsl_constant_ops.c create mode 100644 libs/vkd3d-shader/hlsl_constant_ops.h
Giovanni has already said most of what I would say. I would append, though, that this patch does at least three things at once. In general please try to avoid moving code and changing it at the same time. I.e. move hlsl_fold_constants() to a separate file, *then* change the switch order, *then* make cast folding more generic.