vkd3d: DXIL control flow progress - wine-devel

8 Nov 2019


      I'd like to update the ML on control-flow progress and demonstrate some 
problems.
The WIP repo is here: https://github.com/HansKristian-Work/DXIL2SPIRV. 
I'm developing it as a standalone module for time being.
Control flow in DXIL is a complicated beast as it's a soup of gotos, as 
it is LLVM. The only saving grace is that it must be reducible, i.e. no 
branching straight into a loop, or arbitrary backward gotos.
The main problem with emitting SPIR-V is:
- For every conditional branch we need a selection merge construct with 
a unique merge block which header dominates.
- For every loop header, we need a loop merge with designated continue 
block and unique merge block which header dominates.
- Cannot break out of more than one loop construct at a time (guess what 
DXIL does!).
The main complication currently is that we need ladder breaking. Here's 
a concrete example:
cbuffer Buff : register(b10, space1)
{
     int count1;
     int count2;
     int data[1024];
};
float get_r()
{
     float r = 0.0;
     [loop]
     for (int i = 0; i < count1; i++)
     {
         [loop]
         for (int j = 0; j < count2; j++)
         {
             if (data[i ^ j] == 40)
                 return r;                // <-- goto end; when inlined
             r += float(data[i ^ j]);
         }
     }
     return r;
}
float4 main(float4 pos : POSITION, float4 pos2 : COLOR) : SV_Position
{
     float r = get_r();
     return r.xxxx;
}
This gets compiled into:
...
define void @main() {
   %Buff_cbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 
2, i32 0, i32 10, i1 false)  ; 
CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
   %1 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, 
%dx.types.Handle %Buff_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   %2 = extractvalue %dx.types.CBufRet.i32 %1, 0
   %3 = icmp sgt i32 %2, 0
   br i1 %3, label %.lr.ph2.preheader, label %"\01?get_r@@YAMXZ.exit"
.lr.ph2.preheader:                                ; preds = %0
   br label %.lr.ph2
.lr.ph2:                                          ; preds = 
%._crit_edge, %.lr.ph2.preheader
   %i.i.0 = phi i32 [ %19, %._crit_edge ], [ 0, %.lr.ph2.preheader ]
   %r.i.0 = phi float [ %r.i.2, %._crit_edge ], [ 0.000000e+00, 
%.lr.ph2.preheader ]
   %4 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, 
%dx.types.Handle %Buff_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   %5 = extractvalue %dx.types.CBufRet.i32 %4, 1
   %6 = icmp sgt i32 %5, 0
   br i1 %6, label %.lr.ph.preheader, label %._crit_edge
.lr.ph.preheader:                                 ; preds = %.lr.ph2
   br label %.lr.ph
.lr.ph:                                           ; preds = %12, 
%.lr.ph.preheader
   %j.i.0 = phi i32 [ %15, %12 ], [ 0, %.lr.ph.preheader ]
   %r.i.1 = phi float [ %14, %12 ], [ %r.i.0, %.lr.ph.preheader ]
   %7 = xor i32 %j.i.0, %i.i.0
   %8 = add i32 %7, 1
   %9 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, 
%dx.types.Handle %Buff_cbuffer, i32 %8)  ; 
CBufferLoadLegacy(handle,regIndex)
   %10 = extractvalue %dx.types.CBufRet.i32 %9, 0
   %11 = icmp eq i32 %10, 40
   br i1 %11, label %"\01?get_r@@YAMXZ.exit.loopexit", label %12
; <label>:12                                      ; preds = %.lr.ph
   %13 = sitofp i32 %10 to float
   %14 = fadd fast float %13, %r.i.1
   %15 = add nuw nsw i32 %j.i.0, 1
   %16 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, 
%dx.types.Handle %Buff_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   %17 = extractvalue %dx.types.CBufRet.i32 %16, 1
   %18 = icmp slt i32 %15, %17
   br i1 %18, label %.lr.ph, label %._crit_edge.loopexit, !llvm.loop !25
._crit_edge.loopexit:                             ; preds = %12
   br label %._crit_edge
._crit_edge:                                      ; preds = 
%._crit_edge.loopexit, %.lr.ph2
   %r.i.2 = phi float [ %r.i.0, %.lr.ph2 ], [ %14, %._crit_edge.loopexit ]
   %19 = add nuw nsw i32 %i.i.0, 1
   %20 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, 
%dx.types.Handle %Buff_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   %21 = extractvalue %dx.types.CBufRet.i32 %20, 0
   %22 = icmp slt i32 %19, %21
   br i1 %22, label %.lr.ph2, label 
%"\01?get_r@@YAMXZ.exit.loopexit.11", !llvm.loop !27
"\01?get_r@@YAMXZ.exit.loopexit":                 ; preds = %.lr.ph
   br label %"\01?get_r@@YAMXZ.exit"
"\01?get_r@@YAMXZ.exit.loopexit.11":              ; preds = %._crit_edge
   br label %"\01?get_r@@YAMXZ.exit"
"\01?get_r@@YAMXZ.exit":                          ; preds = 
%"\01?get_r@@YAMXZ.exit.loopexit.11", %"\01?get_r@@YAMXZ.exit.loopexit", %0
   %.0 = phi float [ 0.000000e+00, %0 ], [ %r.i.1, 
%"\01?get_r@@YAMXZ.exit.loopexit" ], [ %r.i.2, 
%"\01?get_r@@YAMXZ.exit.loopexit.11" ]
   call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 
%.0)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
   call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 
%.0)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
   call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 
%.0)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
   call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 
%.0)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
   ret void
}
...
The "return r;" inside the inner loop turns into a full "exit". This is 
a very common pattern where inlined leaf functions return. For a normal 
HLSL -> SPIR-V path, spirv-opt carefully introduces ladders to convert a 
return into a chain of breaks. We have no such luxury in DXIL. 
Basically, the return becomes a forward goto.
After a lot of implementation weirdness, I can turn this into SPIR-V 
which validates. Only control flow is emitted since I haven't looked at 
actual codegen. The resulting GLSL from SPIRV-Cross ends up looking 
something like:
#version 450
void main()
{
     bool COND1;
     if (COND1)
     {
         bool _lr_ph2_preheader;
         bool _lr_ph2;
         bool _lr_ph2_succ;
         bool _lr_ph_preheader;
         bool _lr_ph;
         bool COND11;
         bool _get_r_YAMXZ_exit_loopexit;
         bool _crit_edge_loopexit_pred;
         bool _crit_edge_loopexit;
         bool _crit_edge;
         do
         {
             if (_lr_ph2_succ)
             {
                 do
                 {
                     if (_lr_ph)
                     {
                         break; // <-- goto end;
                     }
                     else
                     {
                     }
                 } while (!COND11);
                 if (!_crit_edge_loopexit_pred) // <-- Ladder to handle goto
                 {
                     break;
                 }
             }
         } while (!_crit_edge);
         bool _get_r_YAMXZ_exit_loopexit_11_pred;
         if (_get_r_YAMXZ_exit_loopexit_11_pred) // <-- Ladder to handle 
goto
         {
             bool _get_r_YAMXZ_exit_loopexit_11;
         }
         bool _get_r_YAMXZ_exit_loopexit_11_succ;
     }
// end:
     bool _get_r_YAMXZ_exit;
}
Some other complications left to consider will be to deal with Phi nodes 
properly, and switch blocks.
Cheers,
Hans-Kristian