From 0f21a2b72fcb8ef6d4ee698a6344d0a2b4962e85 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 3 Jan 2025 14:48:11 +0100 Subject: [PATCH 1/3] vkd3d: Add overhead tid to queue profile. Allows us to study specific parts of submission overhead. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/command.c | 3 +++ libs/vkd3d/queue_timeline.c | 29 ++++++++++++++++++++++++++++- libs/vkd3d/vkd3d_private.h | 8 +++++++- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 09def6816d..3419723b64 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -18936,6 +18936,7 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu if (is_first) { + vkd3d_queue_timeline_trace_begin_execute_overhead(&command_queue->device->queue_timeline_trace, timeline_cookie); d3d12_command_queue_gather_wait_semaphores_locked(command_queue, &submit_desc[0], VKD3D_WAIT_SEMAPHORES_EXTERNAL | VKD3D_WAIT_SEMAPHORES_SERIALIZING); } @@ -18989,6 +18990,8 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu command_queue->last_submission_timeline_value = signal_semaphore_infos[0].value; } + vkd3d_queue_timeline_trace_end_execute_overhead(&command_queue->device->queue_timeline_trace, timeline_cookie); + #ifdef VKD3D_ENABLE_RENDERDOC if (debug_capture) vkd3d_renderdoc_command_queue_end_capture(command_queue); diff --git a/libs/vkd3d/queue_timeline.c b/libs/vkd3d/queue_timeline.c index 1be916209c..3b41e40370 100644 --- a/libs/vkd3d/queue_timeline.c +++ b/libs/vkd3d/queue_timeline.c @@ -543,8 +543,8 @@ void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_tra struct vkd3d_fence_worker *worker, struct vkd3d_queue_timeline_trace_cookie cookie) { + double end_ts, start_submit_ts, start_ts, overhead_start_ts, overhead_end_ts; const struct vkd3d_queue_timeline_trace_state *state; - double end_ts, start_submit_ts, start_ts; unsigned int pid; const char *tid; double *ts_lock; @@ -556,6 +556,8 @@ void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_tra start_ts = (double)(state->start_ts - trace->base_ts) * 1e-3; start_submit_ts = (double)(state->start_submit_ts - trace->base_ts) * 1e-3; end_ts = (double)(vkd3d_get_current_time_ns() - trace->base_ts) * 1e-3; + overhead_start_ts = start_ts + 1e-3 * state->overhead_start_offset; + overhead_end_ts = start_ts + 1e-3 * state->overhead_end_offset; if (worker) { @@ -593,6 +595,9 @@ void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_tra fprintf(trace->file, "{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"submit\", \"pid\": \"0x%04x\", \"ts\": %f, \"dur\": %f },\n", state->desc, pid, start_ts, start_submit_ts - start_ts); + fprintf(trace->file, + "{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"overhead\", \"pid\": \"0x%04x\", \"ts\": %f, \"dur\": %f },\n", + state->desc, pid, overhead_start_ts, overhead_end_ts - overhead_start_ts); } } @@ -609,3 +614,25 @@ void vkd3d_queue_timeline_trace_begin_execute(struct vkd3d_queue_timeline_trace state = &trace->state[cookie.index]; state->start_submit_ts = vkd3d_get_current_time_ns(); } + +void vkd3d_queue_timeline_trace_begin_execute_overhead(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie) +{ + struct vkd3d_queue_timeline_trace_state *state; + if (!trace->active || cookie.index == 0) + return; + + state = &trace->state[cookie.index]; + state->overhead_start_offset = vkd3d_get_current_time_ns() - state->start_ts; +} + +void vkd3d_queue_timeline_trace_end_execute_overhead(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie) +{ + struct vkd3d_queue_timeline_trace_state *state; + if (!trace->active || cookie.index == 0) + return; + + state = &trace->state[cookie.index]; + state->overhead_end_offset = vkd3d_get_current_time_ns() - state->start_ts; +} diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 78a78e3598..f2e6f52d1c 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -4732,7 +4732,9 @@ struct vkd3d_queue_timeline_trace_state uint64_t start_submit_ts; uint64_t record_end_ts; uint64_t record_cookie; - char desc[128 - 5 * sizeof(uint64_t)]; + uint32_t overhead_start_offset; + uint32_t overhead_end_offset; + char desc[128 - 6 * sizeof(uint64_t)]; }; struct vkd3d_queue_timeline_trace @@ -4811,6 +4813,10 @@ void vkd3d_queue_timeline_trace_close_command_list(struct vkd3d_queue_timeline_t struct vkd3d_queue_timeline_trace_cookie cookie); void vkd3d_queue_timeline_trace_begin_execute(struct vkd3d_queue_timeline_trace *trace, struct vkd3d_queue_timeline_trace_cookie cookie); +void vkd3d_queue_timeline_trace_begin_execute_overhead(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie); +void vkd3d_queue_timeline_trace_end_execute_overhead(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie); struct vkd3d_address_binding_report_buffer_info { From 6e22b9406c62afcf6738739969861038d2136d90 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 3 Jan 2025 15:39:48 +0100 Subject: [PATCH 2/3] vkd3d: Add generic execution regions to timeline. Makes it easier to tell what the submission threads are doing. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/command.c | 16 ++++++++++++++++ libs/vkd3d/queue_timeline.c | 25 +++++++++++++++++++------ libs/vkd3d/vkd3d_private.h | 6 ++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 3419723b64..8382a6771e 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -18230,6 +18230,7 @@ static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue, struct d3d12_fence *fence, UINT64 value) { const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs; + struct vkd3d_queue_timeline_trace_cookie cookie; struct d3d12_fence_value fence_value; VkSemaphoreWaitInfo wait_info; struct vkd3d_queue *queue; @@ -18247,7 +18248,9 @@ static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue, * but we don't have virtualized queues in Vulkan, so we need to handle the case * where multiple queues alias over the same physical queue, so effectively, we need to manage out-of-order submits * ourselves. */ + cookie = vkd3d_queue_timeline_trace_register_generic_region(&fence->device->queue_timeline_trace, "WAIT BEFORE SIGNAL"); d3d12_fence_block_until_pending_value_reaches_locked(fence, value); + vkd3d_queue_timeline_trace_complete_execute(&fence->device->queue_timeline_trace, &command_queue->fence_worker, cookie); /* If a host signal unblocked us, or we know that the fence has reached a specific value, there is no need * to queue up a wait. */ @@ -18279,8 +18282,10 @@ static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue, wait_info.pSemaphores = &fence_value.vk_semaphore; wait_info.pValues = &fence_value.vk_semaphore_value; + cookie = vkd3d_queue_timeline_trace_register_generic_region(&fence->device->queue_timeline_trace, "CPU WAIT"); if ((vr = VK_CALL(vkWaitSemaphores(command_queue->device->vk_device, &wait_info, UINT64_MAX)))) ERR("Failed to wait for timeline semaphore, vr %d.\n", vr); + vkd3d_queue_timeline_trace_complete_execute(&fence->device->queue_timeline_trace, &command_queue->fence_worker, cookie); } else { @@ -19404,6 +19409,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) { struct d3d12_command_queue_submission submission; struct d3d12_command_queue_transition_pool pool; + struct vkd3d_queue_timeline_trace_cookie cookie; struct d3d12_command_queue *queue = userdata; VkSemaphoreSubmitInfo transition_semaphore; VkCommandBufferSubmitInfo transition_cmd; @@ -19434,17 +19440,20 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) switch (submission.type) { case VKD3D_SUBMISSION_STOP: + cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "STOP"); goto cleanup; case VKD3D_SUBMISSION_WAIT: VKD3D_REGION_BEGIN(queue_wait); if (is_shared_ID3D12Fence1(submission.wait.fence)) { + cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "WAIT (shared)"); d3d12_command_queue_flush_waiters(queue, 0u); d3d12_command_queue_wait_shared(queue, shared_impl_from_ID3D12Fence1(submission.wait.fence), submission.wait.value); } else { + cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "WAIT (normal)"); d3d12_command_queue_wait(queue, impl_from_ID3D12Fence1(submission.wait.fence), submission.wait.value); } @@ -19453,6 +19462,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) break; case VKD3D_SUBMISSION_SIGNAL: + cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "SIGNAL"); d3d12_command_queue_flush_waiters(queue, 0u); VKD3D_REGION_BEGIN(queue_signal); @@ -19463,6 +19473,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) case VKD3D_SUBMISSION_EXECUTE: VKD3D_REGION_BEGIN(queue_execute); + cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "EXECUTE"); memset(&transition_cmd, 0, sizeof(transition_cmd)); transition_cmd.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO; @@ -19512,6 +19523,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) break; case VKD3D_SUBMISSION_BIND_SPARSE: + cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "SPARSE"); d3d12_command_queue_flush_waiters(queue, VKD3D_WAIT_SEMAPHORES_EXTERNAL); d3d12_command_queue_bind_sparse(queue, submission.bind_sparse.mode, @@ -19521,6 +19533,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) break; case VKD3D_SUBMISSION_DRAIN: + cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "DRAIN"); /* Full flush needed to synchronize interop */ d3d12_command_queue_flush_waiters(queue, VKD3D_WAIT_SEMAPHORES_EXTERNAL | VKD3D_WAIT_SEMAPHORES_SERIALIZING); @@ -19531,6 +19544,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) break; case VKD3D_SUBMISSION_CALLBACK: + cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "CALLBACK"); d3d12_command_queue_flush_waiters(queue, VKD3D_WAIT_SEMAPHORES_EXTERNAL | VKD3D_WAIT_SEMAPHORES_SERIALIZING); submission.callback.callback(submission.callback.userdata); @@ -19540,6 +19554,8 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) ERR("Unrecognized submission type %u.\n", submission.type); break; } + + vkd3d_queue_timeline_trace_complete_execute(&queue->device->queue_timeline_trace, &queue->fence_worker, cookie); } cleanup: diff --git a/libs/vkd3d/queue_timeline.c b/libs/vkd3d/queue_timeline.c index 3b41e40370..f6462f3d7b 100644 --- a/libs/vkd3d/queue_timeline.c +++ b/libs/vkd3d/queue_timeline.c @@ -438,6 +438,13 @@ vkd3d_queue_timeline_trace_register_present_wait(struct vkd3d_queue_timeline_tra return vkd3d_queue_timeline_trace_register_generic_op(trace, VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_WAIT, str); } +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_generic_region(struct vkd3d_queue_timeline_trace *trace, const char *tag) +{ + return vkd3d_queue_timeline_trace_register_generic_op(trace, VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_GENERIC_REGION, tag); +} + + struct vkd3d_queue_timeline_trace_cookie vkd3d_queue_timeline_trace_register_present_block(struct vkd3d_queue_timeline_trace *trace, uint64_t present_id) { @@ -565,6 +572,9 @@ void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_tra vkd3d_queue_timeline_trace_flush_instantaneous(trace, worker); tid = worker->timeline.tid; + if (state->type == VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_GENERIC_REGION) + tid = "regions"; + pid = worker->queue->submission_thread_tid; if (state->type == VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_SUBMISSION) @@ -578,13 +588,16 @@ void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_tra start_submit_ts = start_ts; } - ts_lock = &worker->timeline.lock_end_gpu_ts; + if (state->type != VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_GENERIC_REGION) + { + ts_lock = &worker->timeline.lock_end_gpu_ts; - if (start_submit_ts < *ts_lock) - start_submit_ts = *ts_lock; - if (end_ts < start_submit_ts) - end_ts = start_submit_ts; - *ts_lock = end_ts; + if (start_submit_ts < *ts_lock) + start_submit_ts = *ts_lock; + if (end_ts < start_submit_ts) + end_ts = start_submit_ts; + *ts_lock = end_ts; + } fprintf(trace->file, "{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"%s\", \"pid\": \"0x%04x\", \"ts\": %f, \"dur\": %f },\n", state->desc, tid, pid, start_submit_ts, end_ts - start_submit_ts); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index f2e6f52d1c..6d3ccc68f8 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -4705,6 +4705,9 @@ enum vkd3d_queue_timeline_trace_state_type /* Waiting for present wait to complete. */ VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_WAIT, + /* Generic region markers. */ + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_GENERIC_REGION, + /* Time spent blocking in ::Present() in user thread. */ VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_BLOCK, @@ -4797,6 +4800,9 @@ vkd3d_queue_timeline_trace_register_command_list(struct vkd3d_queue_timeline_tra void vkd3d_queue_timeline_trace_register_instantaneous(struct vkd3d_queue_timeline_trace *trace, enum vkd3d_queue_timeline_trace_state_type type, uint64_t value); +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_generic_region(struct vkd3d_queue_timeline_trace *trace, const char *tag); + void vkd3d_queue_timeline_trace_complete_event_signal(struct vkd3d_queue_timeline_trace *trace, struct vkd3d_fence_worker *worker, struct vkd3d_queue_timeline_trace_cookie cookie); From 9db8676cfc04d07fd06202dc33a1a7ed5cc89fb0 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 3 Jan 2025 17:22:22 +0100 Subject: [PATCH 3/3] vkd3d: Report the VkQueue handle pointer in queue timeline. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/command.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 8382a6771e..2165056400 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -589,14 +589,15 @@ static void *vkd3d_fence_worker_main(void *arg) snprintf(worker->timeline.tid, sizeof(worker->timeline.tid), #ifdef _WIN32 - "family %u, %s, tid 0x%04x, prio %d", + "family %u, %s, tid 0x%04x, prio %d, %p", #else - "family %u, %s, tid %u, prio %d", + "family %u, %s, tid %u, prio %d, %p", #endif worker->queue->vkd3d_queue->vk_family_index, type_str, vkd3d_get_current_thread_id(), - worker->queue->desc.Priority); + worker->queue->desc.Priority, + (void *)worker->queue->vkd3d_queue->vk_queue); cur_fence_count = 0; cur_fences_size = 0;