New subject: [PATCH vkd3d 2/2] vkd3d-shader: Reserve spirv words upfront instead of checking each time.

3 Oct 2019

Signed-off-by: Rémi Bernon rbernon@codeweavers.com
---
The shader_sm4_read_instruction function shows up in perf report when
running SOTTR on Intel because of this loop.
The commented enumeration entries are mostly here to help the reader
validate that the opcode_table matches the enumeration.
libs/vkd3d-shader/dxbc.c | 54 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/libs/vkd3d-shader/dxbc.c b/libs/vkd3d-shader/dxbc.c
index 98c51e42..794c6c66 100644
--- a/libs/vkd3d-shader/dxbc.c
+++ b/libs/vkd3d-shader/dxbc.c
@@ -117,10 +117,13 @@
enum vkd3d_sm4_opcode
 {
+    VKD3D_SM4_OP_INVALID                          = -1,
     VKD3D_SM4_OP_ADD                              = 0x00,
     VKD3D_SM4_OP_AND                              = 0x01,
     VKD3D_SM4_OP_BREAK                            = 0x02,
     VKD3D_SM4_OP_BREAKC                           = 0x03,
+    /* VKD3D_SM4_OP_?                             = 0x04, */
+    /* VKD3D_SM4_OP_?                             = 0x05, */
     VKD3D_SM4_OP_CASE                             = 0x06,
     VKD3D_SM4_OP_CONTINUE                         = 0x07,
     VKD3D_SM4_OP_CONTINUEC                        = 0x08,
@@ -135,6 +138,7 @@ enum vkd3d_sm4_opcode
     VKD3D_SM4_OP_DP4                              = 0x11,
     VKD3D_SM4_OP_ELSE                             = 0x12,
     VKD3D_SM4_OP_EMIT                             = 0x13,
+    /* VKD3D_SM4_OP_?                             = 0x14, */
     VKD3D_SM4_OP_ENDIF                            = 0x15,
     VKD3D_SM4_OP_ENDLOOP                          = 0x16,
     VKD3D_SM4_OP_ENDSWITCH                        = 0x17,
@@ -196,6 +200,7 @@ enum vkd3d_sm4_opcode
     VKD3D_SM4_OP_ULT                              = 0x4f,
     VKD3D_SM4_OP_UGE                              = 0x50,
     VKD3D_SM4_OP_UMUL                             = 0x51,
+    /* VKD3D_SM4_OP_?                             = 0x52, */
     VKD3D_SM4_OP_UMAX                             = 0x53,
     VKD3D_SM4_OP_UMIN                             = 0x54,
     VKD3D_SM4_OP_USHR                             = 0x55,
@@ -215,20 +220,24 @@ enum vkd3d_sm4_opcode
     VKD3D_SM4_OP_DCL_INPUT_PS_SGV                 = 0x63,
     VKD3D_SM4_OP_DCL_INPUT_PS_SIV                 = 0x64,
     VKD3D_SM4_OP_DCL_OUTPUT                       = 0x65,
+    /* VKD3D_SM4_OP_?                             = 0x66, */
     VKD3D_SM4_OP_DCL_OUTPUT_SIV                   = 0x67,
     VKD3D_SM4_OP_DCL_TEMPS                        = 0x68,
     VKD3D_SM4_OP_DCL_INDEXABLE_TEMP               = 0x69,
     VKD3D_SM4_OP_DCL_GLOBAL_FLAGS                 = 0x6a,
+    /* VKD3D_SM4_OP_?                             = 0x6b, */
     VKD3D_SM4_OP_LOD                              = 0x6c,
     VKD3D_SM4_OP_GATHER4                          = 0x6d,
     VKD3D_SM4_OP_SAMPLE_POS                       = 0x6e,
     VKD3D_SM4_OP_SAMPLE_INFO                      = 0x6f,
+    /* VKD3D_SM5_OP_?                             = 0x70, */
     VKD3D_SM5_OP_HS_DECLS                         = 0x71,
     VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE           = 0x72,
     VKD3D_SM5_OP_HS_FORK_PHASE                    = 0x73,
     VKD3D_SM5_OP_HS_JOIN_PHASE                    = 0x74,
     VKD3D_SM5_OP_EMIT_STREAM                      = 0x75,
     VKD3D_SM5_OP_CUT_STREAM                       = 0x76,
+    /* VKD3D_SM5_OP_?                             = 0x77, */
     VKD3D_SM5_OP_FCALL                            = 0x78,
     VKD3D_SM5_OP_BUFINFO                          = 0x79,
     VKD3D_SM5_OP_DERIV_RTX_COARSE                 = 0x7a,
@@ -241,6 +250,8 @@ enum vkd3d_sm4_opcode
     VKD3D_SM5_OP_RCP                              = 0x81,
     VKD3D_SM5_OP_F32TOF16                         = 0x82,
     VKD3D_SM5_OP_F16TOF32                         = 0x83,
+    /* VKD3D_SM5_OP_?                             = 0x84, */
+    /* VKD3D_SM5_OP_?                             = 0x85, */
     VKD3D_SM5_OP_COUNTBITS                        = 0x86,
     VKD3D_SM5_OP_FIRSTBIT_HI                      = 0x87,
     VKD3D_SM5_OP_FIRSTBIT_LO                      = 0x88,
@@ -298,6 +309,19 @@ enum vkd3d_sm4_opcode
     VKD3D_SM5_OP_IMM_ATOMIC_UMAX                  = 0xbc,
     VKD3D_SM5_OP_IMM_ATOMIC_UMIN                  = 0xbd,
     VKD3D_SM5_OP_SYNC                             = 0xbe,
+    /* VKD3D_SM5_OP_?                             = 0xbf, */
+    /* VKD3D_SM5_OP_?                             = 0xc0, */
+    /* VKD3D_SM5_OP_?                             = 0xc1, */
+    /* VKD3D_SM5_OP_?                             = 0xc2, */
+    /* VKD3D_SM5_OP_?                             = 0xc3, */
+    /* VKD3D_SM5_OP_?                             = 0xc4, */
+    /* VKD3D_SM5_OP_?                             = 0xc5, */
+    /* VKD3D_SM5_OP_?                             = 0xc6, */
+    /* VKD3D_SM5_OP_?                             = 0xc7, */
+    /* VKD3D_SM5_OP_?                             = 0xc8, */
+    /* VKD3D_SM5_OP_?                             = 0xc9, */
+    /* VKD3D_SM5_OP_?                             = 0xca, */
+    /* VKD3D_SM5_OP_?                             = 0xcb, */
     VKD3D_SM5_OP_EVAL_SAMPLE_INDEX                = 0xcc,
     VKD3D_SM5_OP_EVAL_CENTROID                    = 0xcd,
     VKD3D_SM5_OP_DCL_GS_INSTANCES                 = 0xce,
@@ -947,6 +971,8 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] =
     {VKD3D_SM4_OP_BREAK,                            VKD3DSIH_BREAK,                            "",     ""},
     {VKD3D_SM4_OP_BREAKC,                           VKD3DSIH_BREAKP,                           "",     "u",
             shader_sm4_read_conditional_op},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
     {VKD3D_SM4_OP_CASE,                             VKD3DSIH_CASE,                             "",     "u"},
     {VKD3D_SM4_OP_CONTINUE,                         VKD3DSIH_CONTINUE,                         "",     ""},
     {VKD3D_SM4_OP_CONTINUEC,                        VKD3DSIH_CONTINUEP,                        "",     "u",
@@ -963,6 +989,7 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] =
     {VKD3D_SM4_OP_DP4,                              VKD3DSIH_DP4,                              "f",    "ff"},
     {VKD3D_SM4_OP_ELSE,                             VKD3DSIH_ELSE,                             "",     ""},
     {VKD3D_SM4_OP_EMIT,                             VKD3DSIH_EMIT,                             "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
     {VKD3D_SM4_OP_ENDIF,                            VKD3DSIH_ENDIF,                            "",     ""},
     {VKD3D_SM4_OP_ENDLOOP,                          VKD3DSIH_ENDLOOP,                          "",     ""},
     {VKD3D_SM4_OP_ENDSWITCH,                        VKD3DSIH_ENDSWITCH,                        "",     ""},
@@ -1027,6 +1054,7 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] =
     {VKD3D_SM4_OP_ULT,                              VKD3DSIH_ULT,                              "u",    "uu"},
     {VKD3D_SM4_OP_UGE,                              VKD3DSIH_UGE,                              "u",    "uu"},
     {VKD3D_SM4_OP_UMUL,                             VKD3DSIH_UMUL,                             "uu",   "uu"},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
     {VKD3D_SM4_OP_UMAX,                             VKD3DSIH_UMAX,                             "u",    "uu"},
     {VKD3D_SM4_OP_UMIN,                             VKD3DSIH_UMIN,                             "u",    "uu"},
     {VKD3D_SM4_OP_USHR,                             VKD3DSIH_USHR,                             "u",    "uu"},
@@ -1060,6 +1088,7 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] =
             shader_sm4_read_dcl_input_ps_siv},
     {VKD3D_SM4_OP_DCL_OUTPUT,                       VKD3DSIH_DCL_OUTPUT,                       "",     "",
             shader_sm4_read_declaration_dst},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
     {VKD3D_SM4_OP_DCL_OUTPUT_SIV,                   VKD3DSIH_DCL_OUTPUT_SIV,                   "",     "",
             shader_sm4_read_declaration_register_semantic},
     {VKD3D_SM4_OP_DCL_TEMPS,                        VKD3DSIH_DCL_TEMPS,                        "",     "",
@@ -1068,16 +1097,19 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] =
             shader_sm4_read_dcl_indexable_temp},
     {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS,                 VKD3DSIH_DCL_GLOBAL_FLAGS,                 "",     "",
             shader_sm4_read_dcl_global_flags},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
     {VKD3D_SM4_OP_LOD,                              VKD3DSIH_LOD,                              "f",    "fRS"},
     {VKD3D_SM4_OP_GATHER4,                          VKD3DSIH_GATHER4,                          "u",    "fRS"},
     {VKD3D_SM4_OP_SAMPLE_POS,                       VKD3DSIH_SAMPLE_POS,                       "f",    "Ru"},
     {VKD3D_SM4_OP_SAMPLE_INFO,                      VKD3DSIH_SAMPLE_INFO,                      "f",    "R"},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
     {VKD3D_SM5_OP_HS_DECLS,                         VKD3DSIH_HS_DECLS,                         "",     ""},
     {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE,           VKD3DSIH_HS_CONTROL_POINT_PHASE,           "",     ""},
     {VKD3D_SM5_OP_HS_FORK_PHASE,                    VKD3DSIH_HS_FORK_PHASE,                    "",     ""},
     {VKD3D_SM5_OP_HS_JOIN_PHASE,                    VKD3DSIH_HS_JOIN_PHASE,                    "",     ""},
     {VKD3D_SM5_OP_EMIT_STREAM,                      VKD3DSIH_EMIT_STREAM,                      "",     "f"},
     {VKD3D_SM5_OP_CUT_STREAM,                       VKD3DSIH_CUT_STREAM,                       "",     "f"},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
     {VKD3D_SM5_OP_FCALL,                            VKD3DSIH_FCALL,                            "",     "O",
             shader_sm5_read_fcall},
     {VKD3D_SM5_OP_BUFINFO,                          VKD3DSIH_BUFINFO,                          "i",    "U"},
@@ -1091,6 +1123,8 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] =
     {VKD3D_SM5_OP_RCP,                              VKD3DSIH_RCP,                              "f",    "f"},
     {VKD3D_SM5_OP_F32TOF16,                         VKD3DSIH_F32TOF16,                         "u",    "f"},
     {VKD3D_SM5_OP_F16TOF32,                         VKD3DSIH_F16TOF32,                         "f",    "u"},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
     {VKD3D_SM5_OP_COUNTBITS,                        VKD3DSIH_COUNTBITS,                        "u",    "u"},
     {VKD3D_SM5_OP_FIRSTBIT_HI,                      VKD3DSIH_FIRSTBIT_HI,                      "u",    "u"},
     {VKD3D_SM5_OP_FIRSTBIT_LO,                      VKD3DSIH_FIRSTBIT_LO,                      "u",    "u"},
@@ -1168,6 +1202,19 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] =
     {VKD3D_SM5_OP_IMM_ATOMIC_UMIN,                  VKD3DSIH_IMM_ATOMIC_UMIN,                  "uU",   "iu"},
     {VKD3D_SM5_OP_SYNC,                             VKD3DSIH_SYNC,                             "",     "",
             shader_sm5_read_sync},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
+    {VKD3D_SM4_OP_INVALID,                          VKD3DSIH_NOP,                              "",     ""},
     {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX,                VKD3DSIH_EVAL_SAMPLE_INDEX,                "f",    "fi"},
     {VKD3D_SM5_OP_EVAL_CENTROID,                    VKD3DSIH_EVAL_CENTROID,                    "f",    "f"},
     {VKD3D_SM5_OP_DCL_GS_INSTANCES,                 VKD3DSIH_DCL_GS_INSTANCES,                 "",     "",
@@ -1220,11 +1267,10 @@ static const enum vkd3d_shader_register_type register_type_table[] =
static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode)
 {
-    unsigned int i;
-
-    for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i)
+    if (opcode < ARRAY_SIZE(opcode_table) && opcode_table[opcode].opcode != VKD3D_SM4_OP_INVALID)
     {
-        if (opcode == opcode_table[i].opcode) return &opcode_table[i];
+        assert(opcode_table[opcode].opcode == opcode);
+        return &opcode_table[opcode];
     }
return NULL;
--
2.23.0

    

[PATCH vkd3d 1/2] vkd3d-shader: Optimize get_opcode_info with direct opcode_table access.