From 6033600abef2b99410c9f07dc5596d1fb2165a6b Mon Sep 17 00:00:00 2001 From: Sidney Just <justsid@x-plane.com> Date: Tue, 23 Jan 2024 10:14:39 -0800 Subject: [PATCH] zink: Workaround for a race condition in the Nvidia driver --- src/gallium/drivers/zink/zink_batch.c | 39 ++++++++++++++------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c index 769ba4f212c0d..f88ee0f982f9d 100644 --- a/src/gallium/drivers/zink/zink_batch.c +++ b/src/gallium/drivers/zink/zink_batch.c @@ -719,26 +719,29 @@ submit_queue(void *data, void *gdata, int thread_index) bs->unordered_write_stages, 0, 0, 1, &mb, 0, NULL, 0, NULL); } - VRAM_ALLOC_LOOP(result, - VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf), - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); - bs->is_device_lost = true; - goto end; - } - ); - } - if (bs->has_unsync) { - VRAM_ALLOC_LOOP(result, - VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf), - if (result != VK_SUCCESS) { - mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); - bs->is_device_lost = true; - goto end; - } - ); } + // On pre 2024 Nvidia drivers, calling vkResetCommandPool() with command buffers that haven't + // ended recording, leads to a race condition inside of vkResetCommandPool() in Nvidias background thread. + // The workaround is to manually end command buffer recording before trying to reset them, even though that's spec conform + VRAM_ALLOC_LOOP(result, + VKSCR(EndCommandBuffer)(bs->reordered_cmdbuf), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + goto end; + } + ); + + VRAM_ALLOC_LOOP(result, + VKSCR(EndCommandBuffer)(bs->unsynchronized_cmdbuf), + if (result != VK_SUCCESS) { + mesa_loge("ZINK: vkEndCommandBuffer failed (%s)", vk_Result_to_str(result)); + bs->is_device_lost = true; + goto end; + } + ); + if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount) num_si--; -- GitLab