From: Conor McCarthy cmccarthy@codeweavers.com
Performance issues can occur when a video output sample has a 2D buffer, because winegstreamer currently outputs only to a linear buffer, which must then be copied into the 2D buffer. Worse, a linear lock of the 2D buffer requires the current contents to be copied to a linear buffer, even when we intend to overwrite it, because MF linear buffers do not support write-only locking. --- dlls/mf/tests/transform.c | 2 +- dlls/winegstreamer/aac_decoder.c | 2 +- dlls/winegstreamer/color_convert.c | 7 ++- dlls/winegstreamer/gst_private.h | 3 +- dlls/winegstreamer/resampler.c | 2 +- dlls/winegstreamer/unixlib.h | 2 + dlls/winegstreamer/video_decoder.c | 3 +- dlls/winegstreamer/video_encoder.c | 2 +- dlls/winegstreamer/video_processor.c | 5 ++- dlls/winegstreamer/wg_sample.c | 66 +++++++++++++++++++++++++--- dlls/winegstreamer/wg_transform.c | 42 +++++++++++++++--- dlls/winegstreamer/wma_decoder.c | 2 +- 12 files changed, 115 insertions(+), 23 deletions(-)
diff --git a/dlls/mf/tests/transform.c b/dlls/mf/tests/transform.c index 6a16332286f..53ed83bb617 100644 --- a/dlls/mf/tests/transform.c +++ b/dlls/mf/tests/transform.c @@ -4799,7 +4799,7 @@ static void test_h264_decoder(BOOL use_2d_buffer) .attributes = output_sample_attributes, .sample_time = 0, .sample_duration = 333667, .total_length = aligned_width_2d * actual_height * 3 / 2, - .buffer_count = 1, .buffers = &output_buffer_desc_nv12, .todo_length = TRUE, + .buffer_count = 1, .buffers = &output_buffer_desc_nv12, }; const struct buffer_desc output_buffer_desc_nv12_2d = { diff --git a/dlls/winegstreamer/aac_decoder.c b/dlls/winegstreamer/aac_decoder.c index 9e6c5c20cb2..b78102bc1b0 100644 --- a/dlls/winegstreamer/aac_decoder.c +++ b/dlls/winegstreamer/aac_decoder.c @@ -558,7 +558,7 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, if (!samples->pSample) return E_INVALIDARG;
- if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, samples->pSample, &samples->dwStatus, NULL))) + if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, samples->pSample, 0, &samples->dwStatus, NULL))) wg_sample_queue_flush(decoder->wg_sample_queue, false); else samples->dwStatus = MFT_OUTPUT_DATA_BUFFER_NO_SAMPLE; diff --git a/dlls/winegstreamer/color_convert.c b/dlls/winegstreamer/color_convert.c index 938d8b7b6f2..c5c7141f4ce 100644 --- a/dlls/winegstreamer/color_convert.c +++ b/dlls/winegstreamer/color_convert.c @@ -651,6 +651,7 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, MFT_OUTPUT_DATA_BUFFER *samples, DWORD *status) { struct color_convert *impl = impl_from_IMFTransform(iface); + MFT_OUTPUT_STREAM_INFO info; HRESULT hr;
TRACE("iface %p, flags %#lx, count %lu, samples %p, status %p.\n", iface, flags, count, samples, status); @@ -665,7 +666,11 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, if (!samples->pSample) return E_INVALIDARG;
- if (SUCCEEDED(hr = wg_transform_read_mf(impl->wg_transform, samples->pSample, &samples->dwStatus, NULL))) + if (FAILED(hr = IMFTransform_GetOutputStreamInfo(iface, 0, &info))) + return hr; + + if (SUCCEEDED(hr = wg_transform_read_mf(impl->wg_transform, samples->pSample, + info.cbSize, &samples->dwStatus, NULL))) wg_sample_queue_flush(impl->wg_sample_queue, false);
return hr; diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index d50c9fa63fb..213a0a15f3a 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -166,7 +166,8 @@ HRESULT wg_transform_push_quartz(wg_transform_t transform, struct wg_sample *sam struct wg_sample_queue *queue); HRESULT wg_transform_push_dmo(wg_transform_t transform, IMediaBuffer *media_buffer, DWORD flags, REFERENCE_TIME time_stamp, REFERENCE_TIME time_length, struct wg_sample_queue *queue); -HRESULT wg_transform_read_mf(wg_transform_t transform, IMFSample *sample, DWORD *flags, bool *preserve_timestamps); +HRESULT wg_transform_read_mf(wg_transform_t transform, IMFSample *sample, + DWORD plane_size, DWORD *flags, bool *preserve_timestamps); HRESULT wg_transform_read_quartz(wg_transform_t transform, struct wg_sample *sample); HRESULT wg_transform_read_dmo(wg_transform_t transform, DMO_OUTPUT_DATA_BUFFER *buffer);
diff --git a/dlls/winegstreamer/resampler.c b/dlls/winegstreamer/resampler.c index 910d109c2c6..287d02a8015 100644 --- a/dlls/winegstreamer/resampler.c +++ b/dlls/winegstreamer/resampler.c @@ -536,7 +536,7 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, return MF_E_TRANSFORM_NEED_MORE_INPUT; }
- if (SUCCEEDED(hr = wg_transform_read_mf(impl->wg_transform, samples->pSample, &samples->dwStatus, NULL))) + if (SUCCEEDED(hr = wg_transform_read_mf(impl->wg_transform, samples->pSample, 0, &samples->dwStatus, NULL))) wg_sample_queue_flush(impl->wg_sample_queue, false);
return hr; diff --git a/dlls/winegstreamer/unixlib.h b/dlls/winegstreamer/unixlib.h index 179f15f78f7..5d11934fac2 100644 --- a/dlls/winegstreamer/unixlib.h +++ b/dlls/winegstreamer/unixlib.h @@ -192,6 +192,7 @@ struct wg_sample UINT32 flags; UINT32 max_size; UINT32 size; + UINT32 stride; UINT64 data; /* pointer to user memory */ };
@@ -334,6 +335,7 @@ struct wg_parser_stream_seek_params struct wg_transform_attrs { UINT32 output_plane_align; + UINT32 output_plane_stride; UINT32 input_queue_length; BOOL allow_format_change; BOOL low_latency; diff --git a/dlls/winegstreamer/video_decoder.c b/dlls/winegstreamer/video_decoder.c index becf148aeef..e56f7b9baf7 100644 --- a/dlls/winegstreamer/video_decoder.c +++ b/dlls/winegstreamer/video_decoder.c @@ -994,7 +994,8 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, } }
- if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, sample, &samples->dwStatus, &preserve_timestamps))) + if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, sample, + sample_size, &samples->dwStatus, &preserve_timestamps))) { wg_sample_queue_flush(decoder->wg_sample_queue, false);
diff --git a/dlls/winegstreamer/video_encoder.c b/dlls/winegstreamer/video_encoder.c index 41291928660..f26bec66c84 100644 --- a/dlls/winegstreamer/video_encoder.c +++ b/dlls/winegstreamer/video_encoder.c @@ -541,7 +541,7 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, if (!samples->pSample) return E_INVALIDARG;
- if (SUCCEEDED(hr = wg_transform_read_mf(encoder->wg_transform, samples->pSample, &samples->dwStatus, NULL))) + if (SUCCEEDED(hr = wg_transform_read_mf(encoder->wg_transform, samples->pSample, 0, &samples->dwStatus, NULL))) wg_sample_queue_flush(encoder->wg_sample_queue, false);
return hr; diff --git a/dlls/winegstreamer/video_processor.c b/dlls/winegstreamer/video_processor.c index 587be98c98b..3e327228e65 100644 --- a/dlls/winegstreamer/video_processor.c +++ b/dlls/winegstreamer/video_processor.c @@ -695,6 +695,7 @@ static HRESULT WINAPI video_processor_ProcessOutput(IMFTransform *iface, DWORD f MFT_OUTPUT_DATA_BUFFER *samples, DWORD *status) { struct video_processor *impl = impl_from_IMFTransform(iface); + MFT_OUTPUT_STREAM_INFO info; IMFSample *output_sample; HRESULT hr; BOOL playback_mode, provide_samples; @@ -708,6 +709,8 @@ static HRESULT WINAPI video_processor_ProcessOutput(IMFTransform *iface, DWORD f return MF_E_TRANSFORM_TYPE_NOT_SET;
samples->dwStatus = 0; + if (FAILED(hr = IMFTransform_GetOutputStreamInfo(iface, 0, &info))) + return hr;
if (FAILED(IMFAttributes_GetUINT32(impl->attributes, &MF_XVP_PLAYBACK_MODE, (UINT32 *) &playback_mode))) playback_mode = FALSE; @@ -728,7 +731,7 @@ static HRESULT WINAPI video_processor_ProcessOutput(IMFTransform *iface, DWORD f IMFSample_AddRef(output_sample); }
- if (FAILED(hr = wg_transform_read_mf(impl->wg_transform, output_sample, &samples->dwStatus, NULL))) + if (FAILED(hr = wg_transform_read_mf(impl->wg_transform, output_sample, info.cbSize, &samples->dwStatus, NULL))) goto done; wg_sample_queue_flush(impl->wg_sample_queue, false);
diff --git a/dlls/winegstreamer/wg_sample.c b/dlls/winegstreamer/wg_sample.c index 2fc2679337f..94dfe483fb6 100644 --- a/dlls/winegstreamer/wg_sample.c +++ b/dlls/winegstreamer/wg_sample.c @@ -52,6 +52,7 @@ struct sample { IMFSample *sample; IMFMediaBuffer *buffer; + IMF2DBuffer2 *buffer2d; } mf; struct { @@ -79,8 +80,16 @@ static void mf_sample_destroy(struct wg_sample *wg_sample)
TRACE_(mfplat)("wg_sample %p.\n", wg_sample);
- IMFMediaBuffer_Unlock(sample->u.mf.buffer); - IMFMediaBuffer_Release(sample->u.mf.buffer); + if (sample->u.mf.buffer2d) + { + IMF2DBuffer2_Unlock2D(sample->u.mf.buffer2d); + IMF2DBuffer2_Release(sample->u.mf.buffer2d); + } + else + { + IMFMediaBuffer_Unlock(sample->u.mf.buffer); + IMFMediaBuffer_Release(sample->u.mf.buffer); + } IMFSample_Release(sample->u.mf.sample); }
@@ -92,21 +101,49 @@ static const struct wg_sample_ops mf_sample_ops = HRESULT wg_sample_create_mf(IMFSample *mf_sample, struct wg_sample **out) { DWORD current_length, max_length; + BYTE *scanline0, *buffer = NULL; + IMF2DBuffer2 *buffer2d; struct sample *sample; - BYTE *buffer; - HRESULT hr; + HRESULT hr = S_OK; + LONG pitch = 0;
if (!(sample = calloc(1, sizeof(*sample)))) return E_OUTOFMEMORY; if (FAILED(hr = IMFSample_ConvertToContiguousBuffer(mf_sample, &sample->u.mf.buffer))) goto fail; - if (FAILED(hr = IMFMediaBuffer_Lock(sample->u.mf.buffer, &buffer, &max_length, ¤t_length))) + if (SUCCEEDED(IMFMediaBuffer_QueryInterface(sample->u.mf.buffer, &IID_IMF2DBuffer2, (void **)&buffer2d))) + { + /* The result of ConvertToContiguousBuffer() can be an existing 2D buffer, which does + * not need to be contiguous by the definition of 'contiguous' for buffers. For 2D + * buffers, call Lock2DSize() and set up GStreamer to output with the correct stride. + * This avoids Lock() copying the entire contents into a linear buffer even though the + * current contents are about to be overwritten, and avoids Unlock() copying the new + * contents. Resolves performance issues on lower spec hardware. */ + if (SUCCEEDED(hr = IMF2DBuffer2_Lock2DSize(buffer2d, MF2DBuffer_LockFlags_ReadWrite, &scanline0, &pitch, &buffer, &max_length))) + { + IMFMediaBuffer_Release(sample->u.mf.buffer); + sample->u.mf.buffer = NULL; + sample->u.mf.buffer2d = buffer2d; + IMF2DBuffer2_GetContiguousLength(buffer2d, ¤t_length); + if (pitch < 0) + pitch = -pitch; + } + else + { + IMF2DBuffer2_Release(buffer2d); + } + if (FAILED(hr)) + goto fail; + } + + if (!buffer && FAILED(hr = IMFMediaBuffer_Lock(sample->u.mf.buffer, &buffer, &max_length, ¤t_length))) goto fail;
IMFSample_AddRef((sample->u.mf.sample = mf_sample)); sample->wg_sample.data = (UINT_PTR)buffer; sample->wg_sample.size = current_length; sample->wg_sample.max_size = max_length; + sample->wg_sample.stride = pitch; sample->ops = &mf_sample_ops;
*out = &sample->wg_sample; @@ -338,10 +375,12 @@ HRESULT wg_transform_push_mf(wg_transform_t transform, IMFSample *sample, return hr; }
-HRESULT wg_transform_read_mf(wg_transform_t transform, IMFSample *sample, DWORD *flags, bool *preserve_timestamps) +HRESULT wg_transform_read_mf(wg_transform_t transform, IMFSample *sample, + DWORD plane_size, DWORD *flags, bool *preserve_timestamps) { struct wg_sample *wg_sample; IMFMediaBuffer *buffer; + DWORD sample_size; HRESULT hr;
TRACE_(mfplat)("transform %#I64x, sample %p, flags %p.\n", transform, sample, flags); @@ -372,7 +411,20 @@ HRESULT wg_transform_read_mf(wg_transform_t transform, IMFSample *sample, DWORD
if (SUCCEEDED(hr = IMFSample_ConvertToContiguousBuffer(sample, &buffer))) { - hr = IMFMediaBuffer_SetCurrentLength(buffer, wg_sample->size); + if (wg_sample->stride && plane_size) + { + /* The sample size must match the frame size, which differs from the contiguous length + * if the buffer has extra width. MF allows a frame to be placed in a wider 2D buffer. */ + sample_size = min(plane_size, wg_sample->size); + } + else + { + if (wg_sample->stride) + FIXME("Expected a plane size.\n"); + sample_size = wg_sample->size; + } + + hr = IMFMediaBuffer_SetCurrentLength(buffer, sample_size); IMFMediaBuffer_Release(buffer); }
diff --git a/dlls/winegstreamer/wg_transform.c b/dlls/winegstreamer/wg_transform.c index f8bf4474756..e7bbdccb4e9 100644 --- a/dlls/winegstreamer/wg_transform.c +++ b/dlls/winegstreamer/wg_transform.c @@ -104,7 +104,7 @@ static struct wg_transform *get_transform(wg_transform_t trans) return (struct wg_transform *)(ULONG_PTR)trans; }
-static void align_video_info_planes(MFVideoInfo *video_info, gsize plane_align, +static void align_video_info_planes(MFVideoInfo *video_info, gsize plane_align, guint stride, GstVideoInfo *info, GstVideoAlignment *align) { bool fix_nv12 = !plane_align && info->finfo->format == GST_VIDEO_FORMAT_NV12 && (info->width & 3) && (info->width & 3) != 3; @@ -123,6 +123,27 @@ static void align_video_info_planes(MFVideoInfo *video_info, gsize plane_align, align->padding_left = aperture->OffsetY.value; }
+ if (stride) + { + /* The MF sample has a 2D buffer. Set padding_right to match its stride. */ + guint width = align->padding_left + info->width + align->padding_right; + const GstVideoFormatInfo *finfo = info->finfo; + gint comp[GST_VIDEO_MAX_COMPONENTS]; + gint pixel_stride; + + gst_video_format_info_component(finfo, 0, comp); + pixel_stride = finfo->pixel_stride[comp[0]]; + + if (stride % pixel_stride) + GST_ERROR("Stride %u not aligned to pixel size", stride); + stride /= pixel_stride; + + if (stride < width) + GST_ERROR("Invalid stride %u", stride); + else + align->padding_right += stride - width; + } + if (video_info->VideoFlags & MFVideoFlag_BottomUpLinearRep) { gsize top = align->padding_top; @@ -217,7 +238,7 @@ static void wg_video_buffer_pool_class_init(WgVideoBufferPoolClass *klass) pool_class->alloc_buffer = wg_video_buffer_pool_alloc_buffer; }
-static WgVideoBufferPool *wg_video_buffer_pool_create(GstCaps *caps, gsize plane_align, +static WgVideoBufferPool *wg_video_buffer_pool_create(GstCaps *caps, gsize plane_align, gsize output_plane_stride, GstAllocator *allocator, MFVideoInfo *video_info, GstVideoAlignment *align) { WgVideoBufferPool *pool; @@ -229,7 +250,7 @@ static WgVideoBufferPool *wg_video_buffer_pool_create(GstCaps *caps, gsize plane
gst_video_info_from_caps(&pool->info, caps); max_size = pool->info.size; - align_video_info_planes(video_info, plane_align, &pool->info, align); + align_video_info_planes(video_info, plane_align, output_plane_stride, &pool->info, align); /* GStreamer assumes NV12 pools must accommodate a stride alignment of 4, but we use 2 */ max_size = max(max_size, pool->info.size);
@@ -313,7 +334,7 @@ static gboolean transform_sink_query_allocation(struct wg_transform *transform, return false;
if (!(pool = wg_video_buffer_pool_create(caps, transform->attrs.output_plane_align, - transform->allocator, &transform->output_info, &align))) + transform->attrs.output_plane_stride, transform->allocator, &transform->output_info, &align))) return false;
if ((params = gst_structure_new("video-meta", @@ -896,7 +917,7 @@ NTSTATUS wg_transform_push_data(void *args) }
if (!(buffer = gst_buffer_new_wrapped_full(GST_MEMORY_FLAG_READONLY, wg_sample_data(sample), sample->max_size, - 0, sample->size, sample, wg_sample_free_notify))) + 0, sample->stride ? sample->max_size : sample->size, sample, wg_sample_free_notify))) { GST_ERROR("Failed to allocate input buffer"); return STATUS_NO_MEMORY; @@ -911,7 +932,7 @@ NTSTATUS wg_transform_push_data(void *args) if (!strcmp(input_mime, "video/x-raw") && gst_video_info_from_caps(&video_info, transform->input_caps)) { GstVideoAlignment align; - align_video_info_planes(&transform->input_info, 0, &video_info, &align); + align_video_info_planes(&transform->input_info, 0, sample->stride, &video_info, &align); buffer_add_video_meta(buffer, &video_info); }
@@ -1214,6 +1235,13 @@ NTSTATUS wg_transform_read_data(void *args) bool discard_data; NTSTATUS status;
+ if (sample->stride != transform->attrs.output_plane_stride) + { + GST_INFO("Reconfiguring to stride %u", sample->stride); + transform->attrs.output_plane_stride = sample->stride; + push_event(transform->my_sink, gst_event_new_reconfigure()); + } + if (!transform->output_sample && !get_transform_output(transform, sample)) { sample->size = 0; @@ -1237,7 +1265,7 @@ NTSTATUS wg_transform_read_data(void *args) dst_video_info = src_video_info;
/* set the desired output buffer alignment and stride on the dest video info */ - align_video_info_planes(&transform->output_info, plane_align, &dst_video_info, &align); + align_video_info_planes(&transform->output_info, plane_align, sample->stride, &dst_video_info, &align);
/* copy the actual output buffer alignment and stride to the src video info */ if ((meta = gst_buffer_get_video_meta(output_buffer))) diff --git a/dlls/winegstreamer/wma_decoder.c b/dlls/winegstreamer/wma_decoder.c index ca7a5f278bf..8ff4d09cfaa 100644 --- a/dlls/winegstreamer/wma_decoder.c +++ b/dlls/winegstreamer/wma_decoder.c @@ -552,7 +552,7 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, return MF_E_TRANSFORM_NEED_MORE_INPUT; }
- if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, samples->pSample, &samples->dwStatus, NULL))) + if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, samples->pSample, 0, &samples->dwStatus, NULL))) wg_sample_queue_flush(decoder->wg_sample_queue, false);
return hr;