[PATCH 2/4] winevulkan: Replace temp heap_allocs with alloca - wine-devel

5 Mar 2020

This allocates these structs and arrays of structures on the stack instead of the heap (which is expensive and takes time!)
These structures and arrays of structures are small, and this is all super duper hot path code.
We need to forceinline these conversion functions in the compiler as otherwise the alloca would invalidate their inlining.
Signed-off-by: Joshua Ashton joshua@froggi.es
---
 dlls/winevulkan/make_vulkan | 142 ++----------------------------------
 dlls/winevulkan/vulkan.c    |  13 +---
 2 files changed, 8 insertions(+), 147 deletions(-)

diff --git a/dlls/winevulkan/make_vulkan b/dlls/winevulkan/make_vulkan
index 3593410041..954d484933 100755
--- a/dlls/winevulkan/make_vulkan
+++ b/dlls/winevulkan/make_vulkan
@@ -666,13 +666,6 @@ class VkFunction(object):
body += p.copy(Direction.OUTPUT)
-        # Perform any required cleanups. Most of these are for array functions.
-        for p in self.params:
-            if not p.needs_free():
-                continue
-
-            body += p.free()
-
         # Finally return the result.
         if self.type != "void":
             body += "    return result;\n"
@@ -1084,9 +1077,6 @@ class VkMember(object):
         else:
             conversions.append(ConversionFunction(False, False, direction, struct))
-        if self.needs_free():
-            conversions.append(FreeFunction(self.is_dynamic_array(), struct))
-
         return conversions
def is_const(self):
@@ -1152,17 +1142,6 @@ class VkMember(object):
         struct = self.type_info["data"]
         return struct.needs_conversion()
-    def needs_free(self):
-        if not self.needs_conversion():
-            return False
-
-        if self.is_dynamic_array():
-            return True
-
-        # TODO: some non-pointer structs and optional pointer structs may need freeing,
-        # though none of this type have been encountered yet.
-        return False
-
     def needs_struct_extensions_conversion(self):
         if not self.is_struct():
             return False
@@ -1248,11 +1227,6 @@ class VkParam(object):
         if self._direction in [Direction.INPUT_OUTPUT, Direction.OUTPUT]:
             self.output_conv = ConversionFunction(False, self.is_dynamic_array(), Direction.OUTPUT, self.struct)
-        # Dynamic arrays, but also some normal structs (e.g. VkCommandBufferBeginInfo) need memory
-        # allocation and thus some cleanup.
-        if self.is_dynamic_array() or self.struct.needs_free():
-            self.free_func = FreeFunction(self.is_dynamic_array(), self.struct)
-
     def _set_direction(self):
         """ Internal helper function to set parameter direction (input/output/input_output). """
@@ -1380,20 +1354,6 @@ class VkParam(object):
     def format_string(self):
         return self.format_str
-    def free(self):
-        if self.is_dynamic_array():
-            if self.struct.returnedonly:
-                # For returnedonly, counts is stored in a pointer.
-                return "    free_{0}_array({1}_host, *{2});\n".format(self.type, self.name, self.dyn_array_len)
-            else:
-                return "    free_{0}_array({1}_host, {2});\n".format(self.type, self.name, self.dyn_array_len)
-        else:
-            # We are operating on a single structure. Some structs (very rare) contain dynamic members,
-            # which would need freeing.
-            if self.struct.needs_free():
-                return "    free_{0}(&{1}_host);\n".format(self.type, self.name)
-        return ""
-
     def get_conversions(self):
         """ Get a list of conversions required for this parameter if any.
         Parameters which are structures may require conversion between win32
@@ -1430,8 +1390,6 @@ class VkParam(object):
             conversions.append(self.input_conv)
         if self.output_conv is not None:
             conversions.append(self.output_conv)
-        if self.free_func is not None:
-            conversions.append(self.free_func)
return conversions
@@ -1479,9 +1437,6 @@ class VkParam(object):
return False
-    def needs_free(self):
-        return self.free_func is not None
-
     def needs_input_conversion(self):
         return self.input_conv is not None
@@ -1694,17 +1649,6 @@ class VkStruct(Sequence):
                 return True
         return False
-    def needs_free(self):
-        """ Check if any struct member needs some memory freeing."""
-
-        for m in self.members:
-            if m.needs_free():
-                return True
-
-            continue
-
-        return False
-
     def needs_struct_extensions_conversion(self):
         """ Checks if structure extensions in pNext chain need conversion. """
         ret = False
@@ -1750,7 +1694,10 @@ class ConversionFunction(object):
             return_type = "{0}_host".format(self.type)
# Generate function prototype.
-        body = "static inline {0} *{1}(".format(return_type, self.name)
+        # We *must* forceinline otherwise our alloca will be
+        # garbage when this function returns.
+        # Regular inline won't cut it, alloca invalidates regular inlining.
+        body = "static WINEVULKAN_FORCEINLINE {0} *{1}(".format(return_type, self.name)
         body += ", ".join(p for p in params)
         body += ")\n{\n"
@@ -1758,7 +1705,7 @@ class ConversionFunction(object):
         body += "    unsigned int i;\n\n"
         body += "    if (!in) return NULL;\n\n"
-        body += "    out = heap_alloc(count * sizeof(*out));\n"
+        body += "    out = WINEVULKAN_ALLOCA(count * sizeof(*out));\n"
body += "    for (i = 0; i < count; i++)\n"
         body += "    {\n"
@@ -1781,7 +1728,7 @@ class ConversionFunction(object):
         else:
             params = ["const {0} *in".format(self.type), "{0}_host *out".format(self.type)]
-        body = "static inline void {0}(".format(self.name)
+        body = "static WINEVULKAN_FORCEINLINE void {0}(".format(self.name)
# Generate parameter list
         body += ", ".join(p for p in params)
@@ -1814,7 +1761,7 @@ class ConversionFunction(object):
             params = ["const {0} *in".format(self.type), "{0} *out_host".format(self.type), "uint32_t count"]
# Generate function prototype.
-        body = "static inline void {0}(".format(self.name)
+        body = "static WINEVULKAN_FORCEINLINE void {0}(".format(self.name)
         body += ", ".join(p for p in params)
         body += ")\n{\n"
         body += "    unsigned int i;\n\n"
@@ -1856,81 +1803,6 @@ class ConversionFunction(object):
         else:
             return self._generate_conversion_func()
-
-class FreeFunction(object):
-    def __init__(self, dyn_array, struct):
-        self.dyn_array = dyn_array
-        self.struct = struct
-        self.type = struct.name
-
-        if dyn_array:
-            self.name = "free_{0}_array".format(self.type)
-        else:
-            self.name = "free_{0}".format(self.type)
-
-    def __eq__(self, other):
-        return self.name == other.name
-
-    def _generate_array_free_func(self):
-        """ Helper function for cleaning up temporary buffers required for array conversions. """
-
-        # Generate function prototype.
-        body = "static inline void {0}({1}_host *in, uint32_t count)\n{{\n".format(self.name, self.type)
-
-        # E.g. VkGraphicsPipelineCreateInfo_host needs freeing for pStages.
-        if self.struct.needs_free():
-            body += "    unsigned int i;\n\n"
-            body += "    if (!in) return;\n\n"
-            body += "    for (i = 0; i < count; i++)\n"
-            body += "    {\n"
-
-            for m in self.struct:
-                if m.needs_conversion() and m.is_dynamic_array():
-                    if m.is_const():
-                        # Add a cast to ignore const on conversion structs we allocated ourselves.
-                        body += "        free_{0}_array(({0}_host *)in[i].{1}, in[i].{2});\n".format(m.type, m.name, m.dyn_array_len)
-                    else:
-                        body += "        free_{0}_array(in[i].{1}, in[i].{2});\n".format(m.type, m.name, m.dyn_array_len)
-                elif m.needs_conversion():
-                    LOGGER.error("Unhandled conversion for {0}".format(m.name))
-            body += "    }\n"
-        else:
-            body += "    if (!in) return;\n\n"
-
-        body += "    heap_free(in);\n"
-
-        body += "}\n\n"
-        return body
-
-    def _generate_free_func(self):
-        # E.g. VkCommandBufferBeginInfo.pInheritanceInfo needs freeing.
-        if not self.struct.needs_free():
-            return ""
-
-        # Generate function prototype.
-        body = "static inline void {0}({1}_host *in)\n{{\n".format(self.name, self.type)
-
-        for m in self.struct:
-            if m.needs_conversion() and m.is_dynamic_array():
-                count = m.dyn_array_len if isinstance(m.dyn_array_len, int) else "in->{0}".format(m.dyn_array_len)
-                if m.is_const():
-                    # Add a cast to ignore const on conversion structs we allocated ourselves.
-                    body += "    free_{0}_array(({0}_host *)in->{1}, {2});\n".format(m.type, m.name, count)
-                else:
-                    body += "    free_{0}_array(in->{1}, {2});\n".format(m.type, m.name, count)
-
-        body += "}\n\n"
-        return body
-
-    def definition(self):
-        if self.dyn_array:
-            return self._generate_array_free_func()
-        else:
-            # Some structures need freeing too if they contain dynamic arrays.
-            # E.g. VkCommandBufferBeginInfo
-            return self._generate_free_func()
-
-
 class StructChainConversionFunction(object):
     def __init__(self, direction, struct):
         self.direction = direction
diff --git a/dlls/winevulkan/vulkan.c b/dlls/winevulkan/vulkan.c
index 59472bcef8..1cc1b4f61f 100644
--- a/dlls/winevulkan/vulkan.c
+++ b/dlls/winevulkan/vulkan.c
@@ -520,23 +520,12 @@ void WINAPI wine_vkCmdExecuteCommands(VkCommandBuffer buffer, uint32_t count,
     if (!buffers || !count)
         return;
-    /* Unfortunately we need a temporary buffer as our command buffers are wrapped.
-     * This call is called often and if a performance concern, we may want to use
-     * alloca as we shouldn't need much memory and it needs to be cleaned up after
-     * the call anyway.
-     */
-    if (!(tmp_buffers = heap_alloc(count * sizeof(*tmp_buffers))))
-    {
-        ERR("Failed to allocate memory for temporary command buffers\n");
-        return;
-    }
+    tmp_buffers = WINEVULKAN_ALLOCA(count * sizeof(*tmp_buffers));
for (i = 0; i < count; i++)
         tmp_buffers[i] = buffers[i]->command_buffer;
buffer->device->funcs.p_vkCmdExecuteCommands(buffer->command_buffer, count, tmp_buffers);
-
-    heap_free(tmp_buffers);
 }
VkResult WINAPI wine_vkCreateDevice(VkPhysicalDevice phys_dev,
-- 
2.25.1