The last patch is a huge optimization to what is done in patch 2. Without that the sample is first copied from GPU to CPU each time wg_transform_read_mf() locks the buffer (allocating extra linear buffer on the way) and then copies the data back to GPU. That happens even if there is no sample available from wg_transform. With the last patch there is just one memory copy to the (write-only) locked DXGI surface buffer. And CPU to GPU texture transfer (which would be there anyway in most cases on software path as most of the apps are going to get the image to GPU anyway). In principle we could also skip the explicit staging texture and use _UpdateSubresource instead directly from h264 decoder, but this is currently not supported in wined3d for chroma formats and the overall difference between _UpdateSubresource and explicit mapped staging texture is not that great probably.
From: Paul Gofman pgofman@codeweavers.com
--- dlls/winegstreamer/h264_decoder.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-)
diff --git a/dlls/winegstreamer/h264_decoder.c b/dlls/winegstreamer/h264_decoder.c index 7d9c38837d3..aa776326e19 100644 --- a/dlls/winegstreamer/h264_decoder.c +++ b/dlls/winegstreamer/h264_decoder.c @@ -61,6 +61,8 @@ struct h264_decoder struct wg_format wg_format; struct wg_transform *wg_transform; struct wg_sample_queue *wg_sample_queue; + + IMFDXGIDeviceManager *dxgi_manager; };
static struct h264_decoder *impl_from_IMFTransform(IMFTransform *iface) @@ -251,6 +253,8 @@ static ULONG WINAPI transform_Release(IMFTransform *iface) IMFAttributes_Release(decoder->output_attributes); if (decoder->attributes) IMFAttributes_Release(decoder->attributes); + if (decoder->dxgi_manager) + IMFDXGIDeviceManager_Release(decoder->dxgi_manager);
wg_sample_queue_destroy(decoder->wg_sample_queue); free(decoder); @@ -580,7 +584,29 @@ static HRESULT WINAPI transform_ProcessEvent(IMFTransform *iface, DWORD id, IMFM
static HRESULT WINAPI transform_ProcessMessage(IMFTransform *iface, MFT_MESSAGE_TYPE message, ULONG_PTR param) { - FIXME("iface %p, message %#x, param %Ix stub!\n", iface, message, param); + struct h264_decoder *decoder = impl_from_IMFTransform(iface); + HRESULT hr; + + TRACE("iface %p, message %#x, param %Ix.\n", iface, message, param); + + if (message == MFT_MESSAGE_SET_D3D_MANAGER) + { + IMFDXGIDeviceManager *dxgi_manager = NULL; + IUnknown *unk = (IUnknown *)param; + + FIXME("MFT_MESSAGE_SET_D3D_MANAGER.\n"); + + if (unk && FAILED(hr = IUnknown_QueryInterface(unk, &IID_IMFDXGIDeviceManager, (void **)&dxgi_manager))) + { + FIXME("Query IMFDXGIDeviceManager failed.\n"); + return hr; + } + if (decoder->dxgi_manager) + IMFDXGIDeviceManager_Release(decoder->dxgi_manager); + decoder->dxgi_manager = dxgi_manager; + } + + FIXME("Ignoring message %#x.\n", message); return S_OK; }
From: Paul Gofman pgofman@codeweavers.com
--- dlls/winegstreamer/h264_decoder.c | 79 +++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 9 deletions(-)
diff --git a/dlls/winegstreamer/h264_decoder.c b/dlls/winegstreamer/h264_decoder.c index aa776326e19..c32e715d749 100644 --- a/dlls/winegstreamer/h264_decoder.c +++ b/dlls/winegstreamer/h264_decoder.c @@ -63,6 +63,7 @@ struct h264_decoder struct wg_sample_queue *wg_sample_queue;
IMFDXGIDeviceManager *dxgi_manager; + IMFVideoSampleAllocatorEx *allocator; };
static struct h264_decoder *impl_from_IMFTransform(IMFTransform *iface) @@ -234,6 +235,15 @@ static ULONG WINAPI transform_AddRef(IMFTransform *iface) return refcount; }
+static void free_allocator(struct h264_decoder *decoder) +{ + if (decoder->allocator) + { + IMFVideoSampleAllocatorEx_Release(decoder->allocator); + decoder->allocator = NULL; + } +} + static ULONG WINAPI transform_Release(IMFTransform *iface) { struct h264_decoder *decoder = impl_from_IMFTransform(iface); @@ -255,7 +265,7 @@ static ULONG WINAPI transform_Release(IMFTransform *iface) IMFAttributes_Release(decoder->attributes); if (decoder->dxgi_manager) IMFDXGIDeviceManager_Release(decoder->dxgi_manager); - + free_allocator(decoder); wg_sample_queue_destroy(decoder->wg_sample_queue); free(decoder); } @@ -526,6 +536,9 @@ static HRESULT WINAPI transform_SetOutputType(IMFTransform *iface, DWORD id, IMF decoder->output_type = NULL; }
+ if (SUCCEEDED(hr)) + free_allocator(decoder); + return hr; }
@@ -594,8 +607,6 @@ static HRESULT WINAPI transform_ProcessMessage(IMFTransform *iface, MFT_MESSAGE_ IMFDXGIDeviceManager *dxgi_manager = NULL; IUnknown *unk = (IUnknown *)param;
- FIXME("MFT_MESSAGE_SET_D3D_MANAGER.\n"); - if (unk && FAILED(hr = IUnknown_QueryInterface(unk, &IID_IMFDXGIDeviceManager, (void **)&dxgi_manager))) { FIXME("Query IMFDXGIDeviceManager failed.\n"); @@ -603,7 +614,11 @@ static HRESULT WINAPI transform_ProcessMessage(IMFTransform *iface, MFT_MESSAGE_ } if (decoder->dxgi_manager) IMFDXGIDeviceManager_Release(decoder->dxgi_manager); - decoder->dxgi_manager = dxgi_manager; + free_allocator(decoder); + if ((decoder->dxgi_manager = dxgi_manager)) + decoder->output_info.dwFlags |= MFT_OUTPUT_STREAM_PROVIDES_SAMPLES; + else + decoder->output_info.dwFlags &= ~MFT_OUTPUT_STREAM_PROVIDES_SAMPLES; }
FIXME("Ignoring message %#x.\n", message); @@ -622,12 +637,33 @@ static HRESULT WINAPI transform_ProcessInput(IMFTransform *iface, DWORD id, IMFS return wg_transform_push_mf(decoder->wg_transform, sample, decoder->wg_sample_queue); }
+static HRESULT initialize_allocator(struct h264_decoder *decoder) +{ + IMFAttributes *attributes = NULL; + HRESULT hr; + + if (FAILED(hr = MFCreateVideoSampleAllocatorEx(&IID_IMFVideoSampleAllocatorEx, (void **)&decoder->allocator))) + return hr; + if (FAILED(hr = IMFVideoSampleAllocatorEx_SetDirectXManager(decoder->allocator, (IUnknown *)decoder->dxgi_manager))) + goto done; + if (FAILED(hr = MFCreateAttributes(&attributes, 0))) + goto done; + hr = IMFVideoSampleAllocatorEx_InitializeSampleAllocatorEx(decoder->allocator, 10, 10, attributes, decoder->output_type); +done: + if (attributes) + IMFAttributes_Release(attributes); + if (FAILED(hr)) + free_allocator(decoder); + return hr; +} + static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, DWORD count, MFT_OUTPUT_DATA_BUFFER *samples, DWORD *status) { struct h264_decoder *decoder = impl_from_IMFTransform(iface); struct wg_format wg_format; UINT32 sample_size; + IMFSample *sample; UINT64 frame_rate; GUID subtype; HRESULT hr; @@ -641,16 +677,32 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, return MF_E_TRANSFORM_TYPE_NOT_SET;
*status = samples->dwStatus = 0; - if (!samples->pSample) - return E_INVALIDARG; + if (decoder->dxgi_manager) + { + if (!decoder->allocator && FAILED(hr = initialize_allocator(decoder))) + { + ERR("Failed to initialize allocator, hr %#lx.\n", hr); + return hr; + } + if (FAILED(hr = IMFVideoSampleAllocatorEx_AllocateSample(decoder->allocator, &sample))) + { + ERR("Failed to allocate sample, hr %#lx.\n", hr); + return hr; + } + } + else + { + if (!(sample = samples->pSample)) + return E_INVALIDARG; + }
if (FAILED(hr = IMFMediaType_GetGUID(decoder->output_type, &MF_MT_SUBTYPE, &subtype))) - return hr; + goto done; if (FAILED(hr = MFCalculateImageSize(&subtype, decoder->wg_format.u.video.width, decoder->wg_format.u.video.height, &sample_size))) - return hr; + goto done;
- if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, samples->pSample, + if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, sample, sample_size, &wg_format, &samples->dwStatus))) wg_sample_queue_flush(decoder->wg_sample_queue, false);
@@ -671,6 +723,15 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, *status |= MFT_OUTPUT_DATA_BUFFER_FORMAT_CHANGE; }
+done: + if (decoder->dxgi_manager) + { + if (hr == S_OK) + samples->pSample = sample; + else + IMFSample_Release(sample); + } + return hr; }
From: Paul Gofman pgofman@codeweavers.com
--- dlls/mf/tests/Makefile.in | 2 +- dlls/mf/tests/mf.c | 4 + dlls/mf/tests/mf_test.h | 2 + dlls/mf/tests/transform.c | 288 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 295 insertions(+), 1 deletion(-)
diff --git a/dlls/mf/tests/Makefile.in b/dlls/mf/tests/Makefile.in index adb9800ca07..c684d033207 100644 --- a/dlls/mf/tests/Makefile.in +++ b/dlls/mf/tests/Makefile.in @@ -1,5 +1,5 @@ TESTDLL = mf.dll -IMPORTS = mf mfplat dmoguids mfuuid strmiids uuid wmcodecdspuuid ole32 user32 propsys msdmo +IMPORTS = mf mfplat dmoguids mfuuid strmiids uuid wmcodecdspuuid ole32 user32 propsys msdmo d3d11
C_SRCS = \ mf.c \ diff --git a/dlls/mf/tests/mf.c b/dlls/mf/tests/mf.c index 399f983983f..59689652677 100644 --- a/dlls/mf/tests/mf.c +++ b/dlls/mf/tests/mf.c @@ -45,6 +45,7 @@ extern GUID DMOVideoFormat_RGB32;
HRESULT (WINAPI *pMFCreateSampleCopierMFT)(IMFTransform **copier); HRESULT (WINAPI *pMFGetTopoNodeCurrentType)(IMFTopologyNode *node, DWORD stream, BOOL output, IMFMediaType **type); +HRESULT (WINAPI *pMFCreateDXGIDeviceManager)(UINT *token, IMFDXGIDeviceManager **manager); BOOL has_video_processor;
static BOOL is_vista(void) @@ -6467,6 +6468,9 @@ void init_functions(void) #define X(f) p##f = (void*)GetProcAddress(mod, #f) X(MFCreateSampleCopierMFT); X(MFGetTopoNodeCurrentType); + + mod = GetModuleHandleA("mfplat.dll"); + X(MFCreateDXGIDeviceManager); #undef X
hr = CoInitialize(NULL); diff --git a/dlls/mf/tests/mf_test.h b/dlls/mf/tests/mf_test.h index 7973e007a68..77af51abd55 100644 --- a/dlls/mf/tests/mf_test.h +++ b/dlls/mf/tests/mf_test.h @@ -32,6 +32,8 @@
extern HRESULT (WINAPI *pMFCreateSampleCopierMFT)(IMFTransform **copier); extern HRESULT (WINAPI *pMFGetTopoNodeCurrentType)(IMFTopologyNode *node, DWORD stream, BOOL output, IMFMediaType **type); +extern HRESULT (WINAPI *pMFCreateDXGIDeviceManager)(UINT *token, IMFDXGIDeviceManager **manager); + extern BOOL has_video_processor; void init_functions(void);
diff --git a/dlls/mf/tests/transform.c b/dlls/mf/tests/transform.c index 602a03aabd0..7d9be831b21 100644 --- a/dlls/mf/tests/transform.c +++ b/dlls/mf/tests/transform.c @@ -42,6 +42,8 @@
#include "initguid.h"
+#include "d3d11_4.h" + DEFINE_GUID(DMOVideoFormat_RGB24,D3DFMT_R8G8B8,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70); DEFINE_GUID(DMOVideoFormat_RGB32,D3DFMT_X8R8G8B8,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70); DEFINE_GUID(DMOVideoFormat_RGB555,D3DFMT_X1R5G5B5,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70); @@ -6713,6 +6715,291 @@ failed: CoUninitialize(); }
+static HRESULT get_next_h264_output_sample(IMFTransform *transform, IMFSample **input_sample, + IMFSample *output_sample, MFT_OUTPUT_DATA_BUFFER *output, const BYTE **data, ULONG *data_len) +{ + DWORD status; + HRESULT hr; + + while (1) + { + status = 0; + memset(output, 0, sizeof(*output)); + output[0].pSample = output_sample; + hr = IMFTransform_ProcessOutput(transform, 0, 1, output, &status); + if (hr != S_OK) + ok(output[0].pSample == output_sample, "got %p.\n", output[0].pSample); + if (hr != MF_E_TRANSFORM_NEED_MORE_INPUT) + return hr; + + ok(status == 0, "got output[0].dwStatus %#lx\n", status); + hr = IMFTransform_ProcessInput(transform, 0, *input_sample, 0); + ok(hr == S_OK, "got %#lx\n", hr); + IMFSample_Release(*input_sample); + *input_sample = next_h264_sample(data, data_len); + } +} + +static void test_h264_with_dxgi_manager(void) +{ + static const unsigned int set_width = 82, set_height = 84, aligned_width = 96, aligned_height = 96; + const struct attribute_desc output_sample_attributes[] = + { + ATTR_UINT32(MFSampleExtension_CleanPoint, 1), + {0}, + }; + const struct buffer_desc output_buffer_desc_nv12 = + { + .length = aligned_width * aligned_height * 3 / 2, + .compare = compare_nv12, .dump = dump_nv12, .rect = {.top=0, .left=0, .right = set_width, .bottom = set_height}, + }; + const struct sample_desc output_sample_desc_nv12 = + { + .attributes = output_sample_attributes, + .sample_time = 333667, .sample_duration = 333667, + .buffer_count = 1, .buffers = &output_buffer_desc_nv12, + }; + + IMFDXGIDeviceManager *manager = NULL; + IMFTrackedSample *tracked_sample; + IMFSample *input_sample, *sample; + MFT_OUTPUT_DATA_BUFFER output[1]; + IMFTransform *transform = NULL; + ID3D11Multithread *multithread; + IMFCollection *output_samples; + MFT_OUTPUT_STREAM_INFO info; + IMFDXGIBuffer *dxgi_buffer; + unsigned int width, height; + D3D11_TEXTURE2D_DESC desc; + IMFMediaBuffer *buffer; + IMFAttributes *attribs; + ID3D11Texture2D *tex2d; + IMF2DBuffer2 *buffer2d; + ID3D11Device *d3d11; + IMFMediaType *type; + DWORD status, val; + UINT64 frame_size; + MFVideoArea area; + const BYTE *data; + ULONG data_len; + UINT32 value; + HRESULT hr; + UINT token; + GUID guid; + DWORD ret; + + if (!pMFCreateDXGIDeviceManager) + { + win_skip("MFCreateDXGIDeviceManager() is not avaliable, skipping tests.\n"); + return; + } + + hr = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, D3D11_CREATE_DEVICE_VIDEO_SUPPORT, NULL, 0, + D3D11_SDK_VERSION, &d3d11, NULL, NULL); + if (FAILED(hr)) + { + skip("D3D11 device creation failed, skipping tests.\n"); + return; + } + + hr = MFStartup(MF_VERSION, 0); + ok(hr == S_OK, "got %#lx\n", hr); + + hr = CoInitialize(NULL); + ok(hr == S_OK, "got %#lx\n", hr); + + hr = ID3D11Device_QueryInterface(d3d11, &IID_ID3D11Multithread, (void **)&multithread); + ok(hr == S_OK, "got %#lx\n", hr); + ID3D11Multithread_SetMultithreadProtected(multithread, TRUE); + ID3D11Multithread_Release(multithread); + + hr = pMFCreateDXGIDeviceManager(&token, &manager); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFDXGIDeviceManager_ResetDevice(manager, (IUnknown *)d3d11, token); + ok(hr == S_OK, "got %#lx\n", hr); + ID3D11Device_Release(d3d11); + + if (FAILED(hr = CoCreateInstance(&CLSID_MSH264DecoderMFT, NULL, CLSCTX_INPROC_SERVER, + &IID_IMFTransform, (void **)&transform))) + goto failed; + + hr = IMFTransform_GetAttributes(transform, &attribs); + ok(hr == S_OK, "got %#lx\n", hr); + + hr = IMFAttributes_GetUINT32(attribs, &MF_SA_D3D11_AWARE, &value); + ok(hr == S_OK, "got %#lx\n", hr); + ok(value == 1, "got %u.\n", value); + IMFAttributes_Release(attribs); + + hr = IMFTransform_ProcessMessage(transform, MFT_MESSAGE_SET_D3D_MANAGER, (ULONG_PTR)manager); + ok(hr == S_OK || broken(hr == E_NOINTERFACE), "got %#lx\n", hr); + if (hr == E_NOINTERFACE) + { + win_skip("No hardware video decoding support.\n"); + goto failed; + } + + hr = MFCreateMediaType(&type); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFMediaType_SetGUID(type, &MF_MT_MAJOR_TYPE, &MFMediaType_Video); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFMediaType_SetGUID(type, &MF_MT_SUBTYPE, &MFVideoFormat_H264); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFMediaType_SetUINT64(type, &MF_MT_FRAME_SIZE, 1088 | (1920ull << 32)); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFTransform_SetInputType(transform, 0, type, 0); + ok(hr == S_OK, "got %#lx\n", hr); + IMFMediaType_Release(type); + + hr = IMFTransform_GetOutputAvailableType(transform, 0, 0, &type); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFMediaType_SetGUID(type, &MF_MT_MAJOR_TYPE, &MFMediaType_Video); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFMediaType_SetGUID(type, &MF_MT_SUBTYPE, &MFVideoFormat_NV12); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFTransform_SetOutputType(transform, 0, type, 0); + ok(hr == S_OK, "got %#lx\n", hr); + IMFMediaType_Release(type); + + status = 0; + memset(output, 0, sizeof(output)); + hr = IMFTransform_ProcessOutput(transform, 0, 1, output, &status); + ok(hr == MF_E_TRANSFORM_NEED_MORE_INPUT, "got %#lx\n", hr); + + hr = IMFTransform_GetAttributes(transform, &attribs); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFAttributes_GetUINT32(attribs, &MF_SA_D3D11_AWARE, &value); + ok(hr == S_OK, "got %#lx\n", hr); + ok(value == 1, "got %u.\n", value); + IMFAttributes_Release(attribs); + + load_resource(L"h264data.bin", &data, &data_len); + + input_sample = next_h264_sample(&data, &data_len); + hr = get_next_h264_output_sample(transform, &input_sample, (void *)0xdeadbeef, output, &data, &data_len); + ok(hr == MF_E_TRANSFORM_STREAM_CHANGE, "got %#lx\n", hr); + ok(output[0].pSample == (void *)0xdeadbeef, "got %p.\n", output[0].pSample); + + hr = IMFTransform_GetOutputAvailableType(transform, 0, 0, &type); + ok(hr == S_OK, "got %#lx\n", hr); + IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); + ok(hr == S_OK, "got %#lx\n", hr); + width = frame_size >> 32; + height = frame_size & 0xffffffff; + ok(width == aligned_width, "got %u.\n", width); + ok(height == aligned_height, "got %u.\n", height); + memset(&area, 0xcc, sizeof(area)); + hr = IMFMediaType_GetBlob(type, &MF_MT_MINIMUM_DISPLAY_APERTURE, (BYTE *)&area, sizeof(area), NULL); + ok(hr == S_OK, "got %#lx\n", hr); + ok(!area.OffsetX.value && !area.OffsetX.fract, "got %d.%d.\n", area.OffsetX.value, area.OffsetX.fract); + ok(!area.OffsetY.value && !area.OffsetY.fract, "got %d.%d.\n", area.OffsetY.value, area.OffsetY.fract); + ok(area.Area.cx == set_width, "got %ld.\n", area.Area.cx); + ok(area.Area.cy == set_height, "got %ld.\n", area.Area.cy); + + hr = IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &guid); + ok(hr == S_OK, "Failed to get subtype, hr %#lx.\n", hr); + ok(IsEqualIID(&guid, &MEDIASUBTYPE_NV12), "got guid %s.\n", debugstr_guid(&guid)); + + hr = IMFTransform_SetOutputType(transform, 0, type, 0); + ok(hr == S_OK, "got %#lx\n", hr); + IMFMediaType_Release(type); + + hr = IMFTransform_GetOutputStreamInfo(transform, 0, &info); + ok(hr == S_OK, "got %#lx\n", hr); + ok(info.dwFlags == (MFT_OUTPUT_STREAM_WHOLE_SAMPLES | MFT_OUTPUT_STREAM_SINGLE_SAMPLE_PER_BUFFER + | MFT_OUTPUT_STREAM_FIXED_SAMPLE_SIZE | MFT_OUTPUT_STREAM_PROVIDES_SAMPLES), "got %#lx.\n", info.dwFlags); + + status = 0; + memset(output, 0, sizeof(output)); + output[0].pSample = (void *)0xdeadbeef; + + hr = get_next_h264_output_sample(transform, &input_sample, (void *)0xdeadbeef, output, &data, &data_len); + ok(hr == S_OK, "got %#lx\n", hr); + ok(output[0].dwStatus == 0, "got %#lx.\n", status); + sample = output[0].pSample; + + hr = IMFSample_QueryInterface(sample, &IID_IMFTrackedSample, (void **)&tracked_sample); + ok(hr == S_OK, "got %#lx\n", hr); + IMFTrackedSample_Release(tracked_sample); + + hr = IMFSample_GetBufferCount(sample, &val); + ok(hr == S_OK, "got %#lx\n", hr); + ok(val == 1, "got %lu.\n", val); + hr = IMFSample_GetBufferByIndex(sample, 0, &buffer); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFMediaBuffer_QueryInterface(buffer, &IID_IMFDXGIBuffer, (void **)&dxgi_buffer); + ok(hr == S_OK, "got %#lx\n", hr); + hr = IMFMediaBuffer_QueryInterface(buffer, &IID_IMF2DBuffer2, (void **)&buffer2d); + ok(hr == S_OK, "got %#lx\n", hr); + + hr = IMFDXGIBuffer_GetResource(dxgi_buffer, &IID_ID3D11Texture2D, (void **)&tex2d); + ok(hr == S_OK, "got %#lx\n", hr); + memset(&desc, 0xcc, sizeof(desc)); + ID3D11Texture2D_GetDesc(tex2d, &desc); + ok(desc.Format == DXGI_FORMAT_NV12, "got %u.\n", desc.Format); + ok(!desc.Usage, "got %u.\n", desc.Usage); + todo_wine ok(desc.BindFlags == D3D11_BIND_DECODER, "got %#x.\n", desc.BindFlags); + ok(!desc.CPUAccessFlags, "got %#x.\n", desc.CPUAccessFlags); + ok(!desc.MiscFlags, "got %#x.\n", desc.MiscFlags); + ok(desc.MipLevels == 1, "git %u.\n", desc.MipLevels); + ok(desc.Width == aligned_width, "got %u.\n", desc.Width); + ok(desc.Height == aligned_height, "got %u.\n", desc.Height); + + ID3D11Texture2D_Release(tex2d); + IMFDXGIBuffer_Release(dxgi_buffer); + IMF2DBuffer2_Release(buffer2d); + IMFMediaBuffer_Release(buffer); + IMFSample_Release(sample); + + status = 0; + hr = get_next_h264_output_sample(transform, &input_sample, (void *)0xdeadbeef, output, &data, &data_len); + ok(hr == S_OK, "got %#lx\n", hr); + ok(sample != output[0].pSample, "got %p.\n", output[0].pSample); + sample = output[0].pSample; + + hr = MFCreateCollection(&output_samples); + ok(hr == S_OK, "MFCreateCollection returned %#lx\n", hr); + + hr = IMFCollection_AddElement(output_samples, (IUnknown *)sample); + ok(hr == S_OK, "AddElement returned %#lx\n", hr); + IMFSample_Release(sample); + + ret = check_mf_sample_collection(output_samples, &output_sample_desc_nv12, L"nv12frame.bmp"); + ok(ret == 0, "got %lu%% diff\n", ret); + IMFCollection_Release(output_samples); + + memset(&info, 0xcc, sizeof(info)); + hr = IMFTransform_GetOutputStreamInfo(transform, 0, &info); + ok(hr == S_OK, "got %#lx\n", hr); + ok(info.dwFlags == (MFT_OUTPUT_STREAM_WHOLE_SAMPLES | MFT_OUTPUT_STREAM_SINGLE_SAMPLE_PER_BUFFER + | MFT_OUTPUT_STREAM_FIXED_SAMPLE_SIZE | MFT_OUTPUT_STREAM_PROVIDES_SAMPLES), "got %#lx.\n", info.dwFlags); + ok(info.cbSize == aligned_width * aligned_height * 2, "got %lu.\n", info.cbSize); + ok(!info.cbAlignment, "got %lu.\n", info.cbAlignment); + + hr = IMFTransform_ProcessMessage(transform, MFT_MESSAGE_SET_D3D_MANAGER, 0); + ok(hr == S_OK, "got %#lx\n", hr); + + memset(&info, 0xcc, sizeof(info)); + hr = IMFTransform_GetOutputStreamInfo(transform, 0, &info); + ok(hr == S_OK, "got %#lx\n", hr); + ok(info.dwFlags == (MFT_OUTPUT_STREAM_WHOLE_SAMPLES | MFT_OUTPUT_STREAM_SINGLE_SAMPLE_PER_BUFFER + | MFT_OUTPUT_STREAM_FIXED_SAMPLE_SIZE), "got %#lx.\n", info.dwFlags); + if (0) + { + /* hangs on Windows. */ + get_next_h264_output_sample(transform, &input_sample, NULL, output, &data, &data_len); + } + + IMFSample_Release(input_sample); + +failed: + if (manager) + IMFDXGIDeviceManager_Release(manager); + if (transform) + IMFTransform_Release(transform); + CoUninitialize(); +} + START_TEST(transform) { init_functions(); @@ -6731,4 +7018,5 @@ START_TEST(transform) test_color_convert(); test_video_processor(); test_mp3_decoder(); + test_h264_with_dxgi_manager(); }
From: Paul Gofman pgofman@codeweavers.com
--- dlls/mf/tests/transform.c | 11 ++++++++++- dlls/mfplat/buffer.c | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/dlls/mf/tests/transform.c b/dlls/mf/tests/transform.c index 7d9be831b21..569ec3e9851 100644 --- a/dlls/mf/tests/transform.c +++ b/dlls/mf/tests/transform.c @@ -6765,11 +6765,13 @@ static void test_h264_with_dxgi_manager(void) IMFSample *input_sample, *sample; MFT_OUTPUT_DATA_BUFFER output[1]; IMFTransform *transform = NULL; + BYTE *scanline0, *buffer_start; ID3D11Multithread *multithread; IMFCollection *output_samples; MFT_OUTPUT_STREAM_INFO info; IMFDXGIBuffer *dxgi_buffer; unsigned int width, height; + DWORD status, val, length; D3D11_TEXTURE2D_DESC desc; IMFMediaBuffer *buffer; IMFAttributes *attribs; @@ -6777,12 +6779,12 @@ static void test_h264_with_dxgi_manager(void) IMF2DBuffer2 *buffer2d; ID3D11Device *d3d11; IMFMediaType *type; - DWORD status, val; UINT64 frame_size; MFVideoArea area; const BYTE *data; ULONG data_len; UINT32 value; + LONG pitch; HRESULT hr; UINT token; GUID guid; @@ -6932,6 +6934,13 @@ static void test_h264_with_dxgi_manager(void) hr = IMFMediaBuffer_QueryInterface(buffer, &IID_IMF2DBuffer2, (void **)&buffer2d); ok(hr == S_OK, "got %#lx\n", hr);
+ hr = IMF2DBuffer2_Lock2DSize(buffer2d, MF2DBuffer_LockFlags_Read, &scanline0, &pitch, &buffer_start, &length); + ok(hr == S_OK, "got %#lx\n", hr); + ok(scanline0 == buffer_start, "got %p, %p.\n", scanline0, buffer_start); + ok(pitch >= aligned_width, "got %ld.\n", pitch); + ok(length == pitch * aligned_height * 3 / 2, "got %lu.\n", length); + IMF2DBuffer2_Unlock2D(buffer2d); + hr = IMFDXGIBuffer_GetResource(dxgi_buffer, &IID_ID3D11Texture2D, (void **)&tex2d); ok(hr == S_OK, "got %#lx\n", hr); memset(&desc, 0xcc, sizeof(desc)); diff --git a/dlls/mfplat/buffer.c b/dlls/mfplat/buffer.c index a221c6d55b8..58eadffa502 100644 --- a/dlls/mfplat/buffer.c +++ b/dlls/mfplat/buffer.c @@ -1045,7 +1045,7 @@ static HRESULT dxgi_surface_buffer_lock(struct buffer *buffer, MF2DBuffer_LockFl if (buffer_start) *buffer_start = *scanline0; if (buffer_length) - *buffer_length = buffer->dxgi_surface.map_desc.RowPitch * buffer->_2d.height; + *buffer_length = buffer->dxgi_surface.map_desc.DepthPitch; }
return hr;
From: Paul Gofman pgofman@codeweavers.com
--- dlls/winegstreamer/h264_decoder.c | 96 ++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 9 deletions(-)
diff --git a/dlls/winegstreamer/h264_decoder.c b/dlls/winegstreamer/h264_decoder.c index c32e715d749..d7029cd42cb 100644 --- a/dlls/winegstreamer/h264_decoder.c +++ b/dlls/winegstreamer/h264_decoder.c @@ -64,6 +64,7 @@ struct h264_decoder
IMFDXGIDeviceManager *dxgi_manager; IMFVideoSampleAllocatorEx *allocator; + IMFMediaBuffer *temp_buffer; };
static struct h264_decoder *impl_from_IMFTransform(IMFTransform *iface) @@ -242,6 +243,11 @@ static void free_allocator(struct h264_decoder *decoder) IMFVideoSampleAllocatorEx_Release(decoder->allocator); decoder->allocator = NULL; } + if (decoder->temp_buffer) + { + IMFMediaBuffer_Release(decoder->temp_buffer); + decoder->temp_buffer = NULL; + } }
static ULONG WINAPI transform_Release(IMFTransform *iface) @@ -640,6 +646,8 @@ static HRESULT WINAPI transform_ProcessInput(IMFTransform *iface, DWORD id, IMFS static HRESULT initialize_allocator(struct h264_decoder *decoder) { IMFAttributes *attributes = NULL; + UINT32 sample_size; + GUID subtype; HRESULT hr;
if (FAILED(hr = MFCreateVideoSampleAllocatorEx(&IID_IMFVideoSampleAllocatorEx, (void **)&decoder->allocator))) @@ -648,7 +656,16 @@ static HRESULT initialize_allocator(struct h264_decoder *decoder) goto done; if (FAILED(hr = MFCreateAttributes(&attributes, 0))) goto done; - hr = IMFVideoSampleAllocatorEx_InitializeSampleAllocatorEx(decoder->allocator, 10, 10, attributes, decoder->output_type); + if (FAILED(hr = IMFVideoSampleAllocatorEx_InitializeSampleAllocatorEx(decoder->allocator, 10, 10, attributes, + decoder->output_type))) + goto done; + if (FAILED(hr = IMFMediaType_GetGUID(decoder->output_type, &MF_MT_SUBTYPE, &subtype))) + goto done; + if (FAILED(hr = MFCalculateImageSize(&subtype, decoder->wg_format.u.video.width, + decoder->wg_format.u.video.height, &sample_size))) + goto done; + + hr = MFCreateMemoryBuffer(sample_size, &decoder->temp_buffer); done: if (attributes) IMFAttributes_Release(attributes); @@ -657,6 +674,69 @@ done: return hr; }
+static HRESULT output_dxgi_sample(struct h264_decoder *decoder, IMFSample **out, IMFSample *src_sample) +{ + DWORD max_length, current_length, dst_length; + BYTE *dst_data, *buffer_start, *src_data; + IMF2DBuffer2 *dst_buffer = NULL; + IMFMediaBuffer *buffer = NULL; + LONG dst_pitch, row_count; + LONGLONG time; + HRESULT hr; + + if (FAILED(hr = IMFVideoSampleAllocatorEx_AllocateSample(decoder->allocator, out))) + return hr; + if (FAILED(hr = IMFSample_DeleteAllItems(*out))) + goto done; + if (FAILED(hr = IMFSample_CopyAllItems(src_sample, (IMFAttributes *)*out))) + goto done; + if (SUCCEEDED(IMFSample_GetSampleTime(src_sample, &time))) + IMFSample_SetSampleTime(*out, time); + if (SUCCEEDED(IMFSample_GetSampleDuration(src_sample, &time))) + IMFSample_SetSampleDuration(*out, time); + if (FAILED(hr = IMFSample_GetBufferByIndex(*out, 0, &buffer))) + goto done; + if (FAILED(hr = IMFMediaBuffer_QueryInterface(buffer, &IID_IMF2DBuffer2, (void **)&dst_buffer))) + goto done; + + if (FAILED(hr = IMFMediaBuffer_Lock(decoder->temp_buffer, &src_data, &max_length, ¤t_length))) + goto done; + + if (FAILED(hr = IMFMediaBuffer_SetCurrentLength(buffer, current_length))) + { + IMFMediaBuffer_Unlock(decoder->temp_buffer); + goto done; + } + + if (FAILED(hr = IMF2DBuffer2_Lock2DSize(dst_buffer, MF2DBuffer_LockFlags_Write, &dst_data, + &dst_pitch, &buffer_start, &dst_length))) + { + IMFMediaBuffer_Unlock(decoder->temp_buffer); + goto done; + } + row_count = current_length / decoder->wg_format.u.video.width; + if (dst_length / dst_pitch != row_count) + { + ERR("Row count mismatch %ld vs %ld.\n", row_count, dst_length / dst_pitch); + IMFMediaBuffer_Unlock(decoder->temp_buffer); + IMF2DBuffer2_Unlock2D(dst_buffer); + goto done; + } + hr = MFCopyImage(dst_data, dst_pitch, src_data, decoder->wg_format.u.video.width, + decoder->wg_format.u.video.width, row_count); + + IMFMediaBuffer_Unlock(decoder->temp_buffer); + IMF2DBuffer2_Unlock2D(dst_buffer); +done: + if (dst_buffer) + IMF2DBuffer2_Release(dst_buffer); + if (buffer) + IMFMediaBuffer_Release(buffer); + if (FAILED(hr)) + IMFSample_Release(*out); + return hr; +} + static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, DWORD count, MFT_OUTPUT_DATA_BUFFER *samples, DWORD *status) { @@ -684,11 +764,10 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, ERR("Failed to initialize allocator, hr %#lx.\n", hr); return hr; } - if (FAILED(hr = IMFVideoSampleAllocatorEx_AllocateSample(decoder->allocator, &sample))) - { - ERR("Failed to allocate sample, hr %#lx.\n", hr); + if (FAILED(hr = MFCreateSample(&sample))) + return hr; + if (FAILED(hr = IMFSample_AddBuffer(sample, decoder->temp_buffer))) return hr; - } } else { @@ -726,10 +805,9 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, done: if (decoder->dxgi_manager) { - if (hr == S_OK) - samples->pSample = sample; - else - IMFSample_Release(sample); + if (hr == S_OK && FAILED(hr = output_dxgi_sample(decoder, &samples->pSample, sample))) + ERR("Failed to output sample, hr %#lx.\n", hr); + IMFSample_Release(sample); }
return hr;
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=131489
Your paranoid android.
=== w11pro64_amd (64 bit report) ===
mf: 1b34:transform: unhandled exception c0000005 at 00007FFE94AEE0B4
Rémi Bernon (@rbernon) commented about dlls/winegstreamer/h264_decoder.c:
IUnknown *unk = (IUnknown *)param;
FIXME("MFT_MESSAGE_SET_D3D_MANAGER.\n");
if (unk && FAILED(hr = IUnknown_QueryInterface(unk, &IID_IMFDXGIDeviceManager, (void **)&dxgi_manager)))
{
FIXME("Query IMFDXGIDeviceManager failed.\n");
return hr;
}
if (decoder->dxgi_manager)
IMFDXGIDeviceManager_Release(decoder->dxgi_manager);
decoder->dxgi_manager = dxgi_manager;
- }
- FIXME("Ignoring message %#x.\n", message); return S_OK;
I'm not sure the FIXME messages really make sense in the way they are written here. The first one is later removed for instance, and the latter will always be reached although it's not really ignored even here.
Rémi Bernon (@rbernon) commented about dlls/mf/tests/transform.c:
- hr = IMFTransform_GetAttributes(transform, &attribs);
- ok(hr == S_OK, "got %#lx\n", hr);
- hr = IMFAttributes_GetUINT32(attribs, &MF_SA_D3D11_AWARE, &value);
- ok(hr == S_OK, "got %#lx\n", hr);
- ok(value == 1, "got %u.\n", value);
- IMFAttributes_Release(attribs);
- hr = IMFTransform_ProcessMessage(transform, MFT_MESSAGE_SET_D3D_MANAGER, (ULONG_PTR)manager);
- ok(hr == S_OK || broken(hr == E_NOINTERFACE), "got %#lx\n", hr);
- if (hr == E_NOINTERFACE)
- {
win_skip("No hardware video decoding support.\n");
goto failed;
- }
This could perhaps test whether `QueryInterface` is really needed in `ProcessMessage`. Otherwise I'd say if it's not tested then it's probably not needed on the implementation side.
Also, any reason not to add the tests upfront? I think it'd make the changes more obvious if the tests were added upfront or in a lockstep, and the implementation incrementally fixing the todo_wine.
Rémi Bernon (@rbernon) commented about dlls/winegstreamer/h264_decoder.c:
+static HRESULT initialize_allocator(struct h264_decoder *decoder) +{
- IMFAttributes *attributes = NULL;
- HRESULT hr;
- if (FAILED(hr = MFCreateVideoSampleAllocatorEx(&IID_IMFVideoSampleAllocatorEx, (void **)&decoder->allocator)))
return hr;
- if (FAILED(hr = IMFVideoSampleAllocatorEx_SetDirectXManager(decoder->allocator, (IUnknown *)decoder->dxgi_manager)))
goto done;
- if (FAILED(hr = MFCreateAttributes(&attributes, 0)))
goto done;
- hr = IMFVideoSampleAllocatorEx_InitializeSampleAllocatorEx(decoder->allocator, 10, 10, attributes, decoder->output_type);
+done:
- if (attributes)
IMFAttributes_Release(attributes);
You can probably use NULL if you don't need attributes, according to the implementation, or at least you don't need the `if (attributes)` check.
Also, I'm thinking that instead of keeping a dxgi_manager reference in the decoder you could have a `IMFVideoSampleAllocatorEx` allocated right on decoder creation, which would save the logic to release and re-create it, and the potential failures.
Then, changing the media type would be calling `UninitializeSampleAllocator` / `InitializeSampleAllocatorEx` and processing the `MFT_MESSAGE_SET_D3D_MANAGER` message would be calling `SetDirectXManager`, which would even save you the `QueryInterface` and the question whether it needs to be done, as it's done internally already.
Of course, I'd appreciate this being split in separate changes ;)
Rémi Bernon (@rbernon) commented about dlls/winegstreamer/h264_decoder.c:
- {
IMFMediaBuffer_Unlock(decoder->temp_buffer);
goto done;
- }
- row_count = current_length / decoder->wg_format.u.video.width;
- if (dst_length / dst_pitch != row_count)
- {
ERR("Row count mismatch %ld vs %ld.\n", row_count, dst_length / dst_pitch);
IMFMediaBuffer_Unlock(decoder->temp_buffer);
IMF2DBuffer2_Unlock2D(dst_buffer);
goto done;
- }
- hr = MFCopyImage(dst_data, dst_pitch, src_data, decoder->wg_format.u.video.width,
decoder->wg_format.u.video.width, row_count);
- IMFMediaBuffer_Unlock(decoder->temp_buffer);
I don't think this last commit should be done here. I understand that currently the wg_sample will force the D3D buffer to be copied to CPU first when locking, but the fix should probably to map the wg_sample memory with Lock2DSize like this here, and support writing directly into it from gstreamer side.
Probably that would be a separate MR if it's not a trivial change.
On Wed Apr 5 18:01:52 2023 +0000, Rémi Bernon wrote:
I don't think this last commit should be done here. I understand that currently the wg_sample will force the D3D buffer to be copied to CPU first when locking, but the fix should probably to map the wg_sample memory with Lock2DSize like this here, and support writing directly into it from gstreamer side. Probably that would be a separate MR if it's not a trivial change.
I considered something between the lines, but there are many problems with this (due to which I considered that not doable): - we don't want to transfer from CPU to GPU also if wg_transform didn't really return any samples (which is the case in at least half of invocations when it needs more input data). That is, if we the sample is not going to be returned we don't want to lock dxgi buffer at all, as there is no way to tell it not to perform any texture update at all. But we still need to pass some buffer as we don't know upfront if the data will be used or not; - I don't know if anything really depends on that, but as far as my additional testing went native doesn't allocate a sample if it is not going to return one (so not trying to allocate a sample which may fail if the app is referencing all the samples). The way I am testing that is trying to get more samples while not freeing any (native will hang in ProcessOutput once out of free samples while we will return an error). - the buffer returned by Lock2DSize is not suitable to pass to wg_transform, it may have different stride (stipulated by 3D implementation). It may even match now by chance but that is not guaranteed (e. g., for the test example pitch for NV12 currently matches on Wine but is 256 on Windows, so that may change). So there should be a separate buffer anyway (that's what general MF buffer Lock does bu allocating a separate "linear" buffer on map. Yes, that can be addressed somehow on gstreamer side, but in the view of the above we probably need a memory buffer anyway.
So the only technically straightforward approach that I see here is to make Unix part wg_transform handle that somehow. But that is also quite not straightforward in practice as in principle requires some callbacks from the Unix part to deal with mapping the sample once needed (which is apparently very uncovinient, we try to avoid callbacks from the Unix side).
Do you see any better way?
On Wed Apr 5 18:01:51 2023 +0000, Rémi Bernon wrote:
This could perhaps test whether `QueryInterface` is really needed in `ProcessMessage`. Otherwise I'd say if it's not tested then it's probably not needed on the implementation side. Also, any reason not to add the tests upfront? I think it'd make the changes more obvious if the tests were added upfront or in a lockstep, and the implementation incrementally fixing the todo_wine.
I am sorry, I am not entirely sure what do you mean here: how can we in principle avoid QueryInterface on the implementation side, once we receive an IUnknown but for now only going to support IDXGIDeviceManager (that probably can be also d3d9 manager in theory but I indeed didn't test it). The only reason I added this skip here as that `MFT_MESSAGE_SET_D3D_MANAGER`fails on Testbot machines with these errors while that doesn't happen here on real hardware (and that's what games apparently do).
Also, any reason not to add the tests upfront? I think it'd make the changes more obvious if the tests were added upfront or in a lockstep, and the implementation incrementally fixing the todo_wine.
Well, the whole test logic WRT actual samples will be just broken until the implementation is there and can probably only be skipped. So I considered that doing that upfront and updating the test on the way won't make most interesting parts clearer and will only add some back and force test tweaking along the patchset. Still if you think that is more convenient I can do that, should I?
On Wed Apr 5 18:25:33 2023 +0000, Paul Gofman wrote:
I considered something between the lines, but there are many problems with this (due to which I considered that not doable):
- we don't want to transfer from CPU to GPU also if wg_transform didn't
really return any samples (which is the case in at least half of invocations when it needs more input data). That is, if we the sample is not going to be returned we don't want to lock dxgi buffer at all, as there is no way to tell it not to perform any texture update at all. But we still need to pass some buffer as we don't know upfront if the data will be used or not;
- I don't know if anything really depends on that, but as far as my
additional testing went native doesn't allocate a sample if it is not going to return one (so not trying to allocate a sample which may fail if the app is referencing all the samples). The way I am testing that is trying to get more samples while not freeing any (native will hang in ProcessOutput once out of free samples while we will return an error).
- the buffer returned by Lock2DSize is not suitable to pass to
wg_transform, it may have different stride (stipulated by 3D implementation). It may even match now by chance but that is not guaranteed (e. g., for the test example pitch for NV12 currently matches on Wine but is 256 on Windows, so that may change). So there should be a separate buffer anyway (that's what general MF buffer Lock does bu allocating a separate "linear" buffer on map. Yes, that can be addressed somehow on gstreamer side, but in the view of the above we probably need a memory buffer anyway. So the only technically straightforward approach that I see here is to make Unix part wg_transform handle that somehow. But that is also quite not straightforward in practice as in principle requires some callbacks from the Unix part to deal with mapping the sample once needed (which is apparently very uncovinient, we try to avoid callbacks from the Unix side). Do you see any better way?
In other words, from the above it looks like we need a temporary memory buffer anyway. And then we need to transfer that to dxgi sample only once needed. Maybe I should just move the sample copy helper out of h264 decoder to wg_sample.c?
On Wed Apr 5 18:34:13 2023 +0000, Paul Gofman wrote:
Also, any reason not to add the tests upfront? I think it'd make the
changes more obvious if the tests were added upfront or in a lockstep, and the implementation incrementally fixing the todo_wine. Well, the whole test logic WRT actual samples will be just broken until the implementation is there and can probably only be skipped. So I considered that doing that upfront and updating the test on the way won't make most interesting parts clearer and will only add some back and force test tweaking along the patchset. Still if you think that is more convenient I can do that, should I?
You can probably ignore the `QueryInterface` suggestion if you use the `IMFVideoSampleAllocatorEx` like I described below, as in this case you also won't have to query the iface in the decoder and can just forward the message parameter.
Regarding the tests, I wouldn't mind having a first patch introducing basic `ProcessMessage` / `MFT_MESSAGE_SET_D3D_MANAGER` tests, then adding the buffer tests.
I'm probably fine with all the tests added at once, but I think having them upfront is better to show what gets implemented.
we don't want to transfer from CPU to GPU also if wg_transform didn't really return any samples (which is the case in at least half of invocations when it needs more input data). That is, if we the sample is not going to be returned we don't want to lock dxgi buffer at all, as there is no way to tell it not to perform any texture update at all. But we still need to pass some buffer as we don't know upfront if the data will be used or not;
I see... then it probably makes sense if there's no way to discard the D3D locking.
I'm not a huge fan of the amount of code required to create and copy the temporary buffer, so probably having this factored out in a common part would be better. Or maybe this should just use an internal `MFCreateSampleCopierMFT` to do the dirty job. This also could an opportunity to improve the sample copier for video buffers if Lock2D is more efficient.
On Wed Apr 5 19:10:43 2023 +0000, Rémi Bernon wrote:
we don't want to transfer from CPU to GPU also if wg_transform didn't
really return any samples (which is the case in at least half of invocations when it needs more input data). That is, if we the sample is not going to be returned we don't want to lock dxgi buffer at all, as there is no way to tell it not to perform any texture update at all. But we still need to pass some buffer as we don't know upfront if the data will be used or not; I see... then it probably makes sense if there's no way to discard the D3D locking. I'm not a huge fan of the amount of code required to create and copy the temporary buffer, so probably having this factored out in a common part would be better. Or maybe this should just use an internal `MFCreateSampleCopierMFT` to do the dirty job. This also could an opportunity to improve the sample copier for video buffers if Lock2D is more efficient.
Sample copier looks related indeed... I guess it is mfplat/sample.c:sample_CopyToBuffer() which could use some optimization for 2d destination buffer (so it locks it with Lock2DSize to pass the flags and avoids both pulling the data from GPU for dest buffer and creating a temporary linear buffer). But that is probably for another MR? Using the temporary buffer and sample copier here will remove extra copies when there is no output samples, while leave GPU -> CPU copies.