Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Queue profile improvements rebase #2272

Merged
merged 3 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions libs/vkd3d/command.c
Original file line number Diff line number Diff line change
Expand Up @@ -589,14 +589,15 @@ static void *vkd3d_fence_worker_main(void *arg)

snprintf(worker->timeline.tid, sizeof(worker->timeline.tid),
#ifdef _WIN32
"family %u, %s, tid 0x%04x, prio %d",
"family %u, %s, tid 0x%04x, prio %d, %p",
#else
"family %u, %s, tid %u, prio %d",
"family %u, %s, tid %u, prio %d, %p",
#endif
worker->queue->vkd3d_queue->vk_family_index,
type_str,
vkd3d_get_current_thread_id(),
worker->queue->desc.Priority);
worker->queue->desc.Priority,
(void *)worker->queue->vkd3d_queue->vk_queue);

cur_fence_count = 0;
cur_fences_size = 0;
Expand Down Expand Up @@ -18230,6 +18231,7 @@ static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue,
struct d3d12_fence *fence, UINT64 value)
{
const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs;
struct vkd3d_queue_timeline_trace_cookie cookie;
struct d3d12_fence_value fence_value;
VkSemaphoreWaitInfo wait_info;
struct vkd3d_queue *queue;
Expand All @@ -18247,7 +18249,9 @@ static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue,
* but we don't have virtualized queues in Vulkan, so we need to handle the case
* where multiple queues alias over the same physical queue, so effectively, we need to manage out-of-order submits
* ourselves. */
cookie = vkd3d_queue_timeline_trace_register_generic_region(&fence->device->queue_timeline_trace, "WAIT BEFORE SIGNAL");
d3d12_fence_block_until_pending_value_reaches_locked(fence, value);
vkd3d_queue_timeline_trace_complete_execute(&fence->device->queue_timeline_trace, &command_queue->fence_worker, cookie);

/* If a host signal unblocked us, or we know that the fence has reached a specific value, there is no need
* to queue up a wait. */
Expand Down Expand Up @@ -18279,8 +18283,10 @@ static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue,
wait_info.pSemaphores = &fence_value.vk_semaphore;
wait_info.pValues = &fence_value.vk_semaphore_value;

cookie = vkd3d_queue_timeline_trace_register_generic_region(&fence->device->queue_timeline_trace, "CPU WAIT");
if ((vr = VK_CALL(vkWaitSemaphores(command_queue->device->vk_device, &wait_info, UINT64_MAX))))
ERR("Failed to wait for timeline semaphore, vr %d.\n", vr);
vkd3d_queue_timeline_trace_complete_execute(&fence->device->queue_timeline_trace, &command_queue->fence_worker, cookie);
}
else
{
Expand Down Expand Up @@ -18936,6 +18942,7 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu

if (is_first)
{
vkd3d_queue_timeline_trace_begin_execute_overhead(&command_queue->device->queue_timeline_trace, timeline_cookie);
d3d12_command_queue_gather_wait_semaphores_locked(command_queue, &submit_desc[0],
VKD3D_WAIT_SEMAPHORES_EXTERNAL | VKD3D_WAIT_SEMAPHORES_SERIALIZING);
}
Expand Down Expand Up @@ -18989,6 +18996,8 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu
command_queue->last_submission_timeline_value = signal_semaphore_infos[0].value;
}

vkd3d_queue_timeline_trace_end_execute_overhead(&command_queue->device->queue_timeline_trace, timeline_cookie);

#ifdef VKD3D_ENABLE_RENDERDOC
if (debug_capture)
vkd3d_renderdoc_command_queue_end_capture(command_queue);
Expand Down Expand Up @@ -19401,6 +19410,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
{
struct d3d12_command_queue_submission submission;
struct d3d12_command_queue_transition_pool pool;
struct vkd3d_queue_timeline_trace_cookie cookie;
struct d3d12_command_queue *queue = userdata;
VkSemaphoreSubmitInfo transition_semaphore;
VkCommandBufferSubmitInfo transition_cmd;
Expand Down Expand Up @@ -19431,17 +19441,20 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
switch (submission.type)
{
case VKD3D_SUBMISSION_STOP:
cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "STOP");
goto cleanup;

case VKD3D_SUBMISSION_WAIT:
VKD3D_REGION_BEGIN(queue_wait);
if (is_shared_ID3D12Fence1(submission.wait.fence))
{
cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "WAIT (shared)");
d3d12_command_queue_flush_waiters(queue, 0u);
d3d12_command_queue_wait_shared(queue, shared_impl_from_ID3D12Fence1(submission.wait.fence), submission.wait.value);
}
else
{
cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "WAIT (normal)");
d3d12_command_queue_wait(queue, impl_from_ID3D12Fence1(submission.wait.fence), submission.wait.value);
}

Expand All @@ -19450,6 +19463,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
break;

case VKD3D_SUBMISSION_SIGNAL:
cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "SIGNAL");
d3d12_command_queue_flush_waiters(queue, 0u);

VKD3D_REGION_BEGIN(queue_signal);
Expand All @@ -19460,6 +19474,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)

case VKD3D_SUBMISSION_EXECUTE:
VKD3D_REGION_BEGIN(queue_execute);
cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "EXECUTE");

memset(&transition_cmd, 0, sizeof(transition_cmd));
transition_cmd.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO;
Expand Down Expand Up @@ -19509,6 +19524,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
break;

case VKD3D_SUBMISSION_BIND_SPARSE:
cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "SPARSE");
d3d12_command_queue_flush_waiters(queue, VKD3D_WAIT_SEMAPHORES_EXTERNAL);

d3d12_command_queue_bind_sparse(queue, submission.bind_sparse.mode,
Expand All @@ -19518,6 +19534,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
break;

case VKD3D_SUBMISSION_DRAIN:
cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "DRAIN");
/* Full flush needed to synchronize interop */
d3d12_command_queue_flush_waiters(queue, VKD3D_WAIT_SEMAPHORES_EXTERNAL | VKD3D_WAIT_SEMAPHORES_SERIALIZING);

Expand All @@ -19528,6 +19545,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
break;

case VKD3D_SUBMISSION_CALLBACK:
cookie = vkd3d_queue_timeline_trace_register_generic_region(&queue->device->queue_timeline_trace, "CALLBACK");
d3d12_command_queue_flush_waiters(queue, VKD3D_WAIT_SEMAPHORES_EXTERNAL | VKD3D_WAIT_SEMAPHORES_SERIALIZING);

submission.callback.callback(submission.callback.userdata);
Expand All @@ -19537,6 +19555,8 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
ERR("Unrecognized submission type %u.\n", submission.type);
break;
}

vkd3d_queue_timeline_trace_complete_execute(&queue->device->queue_timeline_trace, &queue->fence_worker, cookie);
}

cleanup:
Expand Down
54 changes: 47 additions & 7 deletions libs/vkd3d/queue_timeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,13 @@ vkd3d_queue_timeline_trace_register_present_wait(struct vkd3d_queue_timeline_tra
return vkd3d_queue_timeline_trace_register_generic_op(trace, VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_WAIT, str);
}

struct vkd3d_queue_timeline_trace_cookie
vkd3d_queue_timeline_trace_register_generic_region(struct vkd3d_queue_timeline_trace *trace, const char *tag)
{
return vkd3d_queue_timeline_trace_register_generic_op(trace, VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_GENERIC_REGION, tag);
}


struct vkd3d_queue_timeline_trace_cookie
vkd3d_queue_timeline_trace_register_present_block(struct vkd3d_queue_timeline_trace *trace, uint64_t present_id)
{
Expand Down Expand Up @@ -543,8 +550,8 @@ void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_tra
struct vkd3d_fence_worker *worker,
struct vkd3d_queue_timeline_trace_cookie cookie)
{
double end_ts, start_submit_ts, start_ts, overhead_start_ts, overhead_end_ts;
const struct vkd3d_queue_timeline_trace_state *state;
double end_ts, start_submit_ts, start_ts;
unsigned int pid;
const char *tid;
double *ts_lock;
Expand All @@ -556,13 +563,18 @@ void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_tra
start_ts = (double)(state->start_ts - trace->base_ts) * 1e-3;
start_submit_ts = (double)(state->start_submit_ts - trace->base_ts) * 1e-3;
end_ts = (double)(vkd3d_get_current_time_ns() - trace->base_ts) * 1e-3;
overhead_start_ts = start_ts + 1e-3 * state->overhead_start_offset;
overhead_end_ts = start_ts + 1e-3 * state->overhead_end_offset;

if (worker)
{
if (state->type == VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_SUBMISSION)
vkd3d_queue_timeline_trace_flush_instantaneous(trace, worker);

tid = worker->timeline.tid;
if (state->type == VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_GENERIC_REGION)
tid = "regions";

pid = worker->queue->submission_thread_tid;

if (state->type == VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_SUBMISSION)
Expand All @@ -576,13 +588,16 @@ void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_tra
start_submit_ts = start_ts;
}

ts_lock = &worker->timeline.lock_end_gpu_ts;
if (state->type != VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_GENERIC_REGION)
{
ts_lock = &worker->timeline.lock_end_gpu_ts;

if (start_submit_ts < *ts_lock)
start_submit_ts = *ts_lock;
if (end_ts < start_submit_ts)
end_ts = start_submit_ts;
*ts_lock = end_ts;
if (start_submit_ts < *ts_lock)
start_submit_ts = *ts_lock;
if (end_ts < start_submit_ts)
end_ts = start_submit_ts;
*ts_lock = end_ts;
}

fprintf(trace->file, "{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"%s\", \"pid\": \"0x%04x\", \"ts\": %f, \"dur\": %f },\n",
state->desc, tid, pid, start_submit_ts, end_ts - start_submit_ts);
Expand All @@ -593,6 +608,9 @@ void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_tra
fprintf(trace->file,
"{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"submit\", \"pid\": \"0x%04x\", \"ts\": %f, \"dur\": %f },\n",
state->desc, pid, start_ts, start_submit_ts - start_ts);
fprintf(trace->file,
"{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"overhead\", \"pid\": \"0x%04x\", \"ts\": %f, \"dur\": %f },\n",
state->desc, pid, overhead_start_ts, overhead_end_ts - overhead_start_ts);
}
}

Expand All @@ -609,3 +627,25 @@ void vkd3d_queue_timeline_trace_begin_execute(struct vkd3d_queue_timeline_trace
state = &trace->state[cookie.index];
state->start_submit_ts = vkd3d_get_current_time_ns();
}

void vkd3d_queue_timeline_trace_begin_execute_overhead(struct vkd3d_queue_timeline_trace *trace,
struct vkd3d_queue_timeline_trace_cookie cookie)
{
struct vkd3d_queue_timeline_trace_state *state;
if (!trace->active || cookie.index == 0)
return;

state = &trace->state[cookie.index];
state->overhead_start_offset = vkd3d_get_current_time_ns() - state->start_ts;
}

void vkd3d_queue_timeline_trace_end_execute_overhead(struct vkd3d_queue_timeline_trace *trace,
struct vkd3d_queue_timeline_trace_cookie cookie)
{
struct vkd3d_queue_timeline_trace_state *state;
if (!trace->active || cookie.index == 0)
return;

state = &trace->state[cookie.index];
state->overhead_end_offset = vkd3d_get_current_time_ns() - state->start_ts;
}
14 changes: 13 additions & 1 deletion libs/vkd3d/vkd3d_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -4705,6 +4705,9 @@ enum vkd3d_queue_timeline_trace_state_type
/* Waiting for present wait to complete. */
VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_WAIT,

/* Generic region markers. */
VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_GENERIC_REGION,

/* Time spent blocking in ::Present() in user thread. */
VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_BLOCK,

Expand Down Expand Up @@ -4732,7 +4735,9 @@ struct vkd3d_queue_timeline_trace_state
uint64_t start_submit_ts;
uint64_t record_end_ts;
uint64_t record_cookie;
char desc[128 - 5 * sizeof(uint64_t)];
uint32_t overhead_start_offset;
uint32_t overhead_end_offset;
char desc[128 - 6 * sizeof(uint64_t)];
};

struct vkd3d_queue_timeline_trace
Expand Down Expand Up @@ -4795,6 +4800,9 @@ vkd3d_queue_timeline_trace_register_command_list(struct vkd3d_queue_timeline_tra
void vkd3d_queue_timeline_trace_register_instantaneous(struct vkd3d_queue_timeline_trace *trace,
enum vkd3d_queue_timeline_trace_state_type type, uint64_t value);

struct vkd3d_queue_timeline_trace_cookie
vkd3d_queue_timeline_trace_register_generic_region(struct vkd3d_queue_timeline_trace *trace, const char *tag);

void vkd3d_queue_timeline_trace_complete_event_signal(struct vkd3d_queue_timeline_trace *trace,
struct vkd3d_fence_worker *worker,
struct vkd3d_queue_timeline_trace_cookie cookie);
Expand All @@ -4811,6 +4819,10 @@ void vkd3d_queue_timeline_trace_close_command_list(struct vkd3d_queue_timeline_t
struct vkd3d_queue_timeline_trace_cookie cookie);
void vkd3d_queue_timeline_trace_begin_execute(struct vkd3d_queue_timeline_trace *trace,
struct vkd3d_queue_timeline_trace_cookie cookie);
void vkd3d_queue_timeline_trace_begin_execute_overhead(struct vkd3d_queue_timeline_trace *trace,
struct vkd3d_queue_timeline_trace_cookie cookie);
void vkd3d_queue_timeline_trace_end_execute_overhead(struct vkd3d_queue_timeline_trace *trace,
struct vkd3d_queue_timeline_trace_cookie cookie);

struct vkd3d_address_binding_report_buffer_info
{
Expand Down