Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Control point control flow #1481

Closed
wants to merge 14 commits into from
38 changes: 17 additions & 21 deletions hwtracer/src/perf/collect.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <syscall.h>
#include <threads.h>
#include <time.h>
#include <unistd.h>

Expand All @@ -44,17 +43,6 @@
// The bit in the IA32_RTIT_CTL MSR that disables compressed returns.
#define IA32_RTIT_CTL_DISRETC 1 << 11

/*
* The thread's perf file descriptor and its associated underlying `mmap(2)`
* regions. The file descriptor is re-used for subsequent trace collections for
* the same thread.
*
* FIXME: These leak when a thread dies.
*/
static thread_local void *cache_base_buf = NULL;
static thread_local void *cache_aux_buf = NULL;
static thread_local int cache_perf_fd = -1;

enum hwt_cerror_kind {
hwt_cerror_unused = 0,
hwt_cerror_unknown = 1,
Expand Down Expand Up @@ -530,9 +518,7 @@ hwt_perf_init_collector(struct hwt_perf_collector_config *tr_conf,
tr_ctx->perf_fd = -1;

// Obtain a file descriptor through which to speak to perf.
if (cache_perf_fd == -1)
cache_perf_fd = open_perf(tr_conf->aux_bufsize, err);
tr_ctx->perf_fd = cache_perf_fd;
tr_ctx->perf_fd = open_perf(tr_conf->aux_bufsize, err);
if (tr_ctx->perf_fd == -1) {
hwt_set_cerr(err, hwt_cerror_errno, errno);
failing = true;
Expand Down Expand Up @@ -562,10 +548,8 @@ hwt_perf_init_collector(struct hwt_perf_collector_config *tr_conf,
// data_bufsize'.
int page_size = getpagesize();
tr_ctx->base_bufsize = (1 + tr_conf->data_bufsize) * page_size;
if (!cache_base_buf)
cache_base_buf = mmap(NULL, tr_ctx->base_bufsize, PROT_WRITE, MAP_SHARED,
tr_ctx->base_buf = mmap(NULL, tr_ctx->base_bufsize, PROT_WRITE, MAP_SHARED,
tr_ctx->perf_fd, 0);
tr_ctx->base_buf = cache_base_buf;
if (tr_ctx->base_buf == MAP_FAILED) {
hwt_set_cerr(err, hwt_cerror_errno, errno);
failing = true;
Expand All @@ -581,10 +565,8 @@ hwt_perf_init_collector(struct hwt_perf_collector_config *tr_conf,
// Allocate the AUX buffer.
//
// Mapped R/W so as to have a saturating ring buffer.
if (!cache_aux_buf)
cache_aux_buf = mmap(NULL, base_header->aux_size, PROT_READ | PROT_WRITE,
tr_ctx->aux_buf = mmap(NULL, base_header->aux_size, PROT_READ | PROT_WRITE,
MAP_SHARED, tr_ctx->perf_fd, base_header->aux_offset);
tr_ctx->aux_buf = cache_aux_buf;
if (tr_ctx->aux_buf == MAP_FAILED) {
hwt_set_cerr(err, hwt_cerror_errno, errno);
failing = true;
Expand Down Expand Up @@ -752,6 +734,16 @@ bool hwt_perf_free_collector(struct hwt_perf_ctx *tr_ctx,
struct hwt_cerror *err) {
int ret = true;

if ((tr_ctx->aux_buf) &&
(munmap(tr_ctx->aux_buf, tr_ctx->aux_bufsize) == -1)) {
hwt_set_cerr(err, hwt_cerror_errno, errno);
ret = false;
}
if ((tr_ctx->base_buf) &&
(munmap(tr_ctx->base_buf, tr_ctx->base_bufsize) == -1)) {
hwt_set_cerr(err, hwt_cerror_errno, errno);
ret = false;
}
if (tr_ctx->stop_fds[1] != -1) {
// If the write end of the pipe is still open, the thread is still running.
close(tr_ctx->stop_fds[1]); // signals thread to stop.
Expand All @@ -763,6 +755,10 @@ bool hwt_perf_free_collector(struct hwt_perf_ctx *tr_ctx,
if (tr_ctx->stop_fds[0] != -1) {
close(tr_ctx->stop_fds[0]);
}
if (tr_ctx->perf_fd >= 0) {
close(tr_ctx->perf_fd);
tr_ctx->perf_fd = -1;
}
if (tr_ctx != NULL) {
free(tr_ctx);
}
Expand Down
23 changes: 5 additions & 18 deletions hwtracer/src/pt/ykpt/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use super::packets::*;

#[derive(Clone, Copy, Debug)]
enum PacketParserState {
/// Initial state, waiting for a PSB packet.
Init,
/// The "normal" decoding state.
Normal,
/// We are decoding a PSB+ sequence.
Expand All @@ -27,6 +29,7 @@ impl PacketParserState {
// OPT: The order below is a rough guess based on what limited traces I've seen. Benchmark
// and optimise.
match self {
Self::Init => &[PacketKind::PSB],
Self::Normal => &[
PacketKind::ShortTNT,
PacketKind::PAD,
Expand Down Expand Up @@ -60,6 +63,7 @@ impl PacketParserState {
/// kind of packet.
fn transition(&mut self, pkt_kind: PacketKind) {
let new = match (*self, pkt_kind) {
(Self::Init, PacketKind::PSB) => Self::PSBPlus,
(Self::Normal, PacketKind::PSB) => Self::PSBPlus,
(Self::PSBPlus, PacketKind::PSBEND) => Self::Normal,
_ => return, // No state transition.
Expand Down Expand Up @@ -105,7 +109,7 @@ impl<'t> PacketParser<'t> {
pub(super) fn new(bytes: &'t [u8]) -> Self {
Self {
bits: BitSlice::from_slice(bytes),
state: PacketParserState::Normal,
state: PacketParserState::Init,
prev_tip: 0,
}
}
Expand Down Expand Up @@ -228,7 +232,6 @@ impl Iterator for PacketParser<'_> {
mod tests {
use super::{super::packets::*, PacketParser};
use crate::{trace_closure, work_loop, TracerBuilder, TracerKind};
use std::hint::black_box;

/// Parse the packets of a small trace, checking the basic structure of the decoded trace.
#[test]
Expand Down Expand Up @@ -270,22 +273,6 @@ mod tests {
assert!(matches!(ts, TestState::SawPacketGenDisable));
}

/// Checks PT packet streams make sense when a perf fd is re-used.
#[test]
fn decode_many() {
let tc = TracerBuilder::new()
.tracer_kind(TracerKind::PT(crate::perf::PerfCollectorConfig::default()))
.build()
.unwrap();
for _ in 0..50 {
let trace = trace_closure(&tc, || work_loop(3));
// Force full-decoding of the trace.
for p in PacketParser::new(trace.bytes()) {
let _ = black_box(p);
}
}
}

/// Test target IP decompression when the `IPBytes = 0b000`.
#[test]
fn ipbytes_decompress_000() {
Expand Down
67 changes: 67 additions & 0 deletions tests/c/early_return_fall_out.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Run-time:
// env-var: YKD_LOG_IR=-:jit-post-opt
// env-var: YKD_SERIALISE_COMPILATION=1
// env-var: YK_LOG=4
// stderr:
// 3
// 2
// yk-jit-event: start-tracing
// 1
// yk-jit-event: stop-tracing-early-return
// return
// yk-jit-event: start-tracing
// 3
// yk-jit-event: stop-tracing
// ...
// 2
// yk-jit-event: enter-jit-code
// 1
// yk-jit-event: deoptimise
// return
// exit

// Check that an early return caused by falling out of the interpreter loop is
// handled correctly.

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <yk.h>
#include <yk_testing.h>

void loop(YkMT *, YkLocation *);

void loop(YkMT *mt, YkLocation *loc) {
int res = 9998;
int i = 3;
NOOPT_VAL(res);
NOOPT_VAL(i);
while (i > 0) {
yk_mt_control_point(mt, loc);
fprintf(stderr, "%d\n", i);
i--;
}
yk_mt_early_return(mt);
fprintf(stderr, "return\n");
NOOPT_VAL(res);
}

int main(int argc, char **argv) {
YkMT *mt = yk_mt_new(NULL);
yk_mt_hot_threshold_set(mt, 2);
YkLocation loc = yk_location_new();

int res = 9998;
int i = 4;
NOOPT_VAL(loc);
NOOPT_VAL(res);
NOOPT_VAL(i);
loop(mt, &loc);
loop(mt, &loc);
fprintf(stderr, "exit\n");
NOOPT_VAL(res);
yk_location_drop(loc);
yk_mt_shutdown(mt);
return (EXIT_SUCCESS);
}
66 changes: 66 additions & 0 deletions tests/c/early_return_recursive1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Run-time:
// env-var: YKD_LOG_IR=-:jit-pre-opt,jit-post-opt
// env-var: YKD_SERIALISE_COMPILATION=1
// env-var: YK_LOG=4
// stderr:
// 2
// yk-jit-event: start-tracing
// 1
// yk-jit-event: stop-tracing-early-return
// return
// 3
// yk-jit-event: start-tracing
// 2
// yk-jit-event: stop-tracing
// --- Begin jit-pre-opt ---
// ...
// --- End jit-pre-opt ---
// ...
// 1
// return
// exit

// Check that early return from recursive interpreter loops works.

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <yk.h>
#include <yk_testing.h>

int loop(YkMT *, YkLocation *, int);

int loop(YkMT *mt, YkLocation *loc, int i) {
int res = 9998;
NOOPT_VAL(res);
NOOPT_VAL(i);
while (i > 0) {
yk_mt_control_point(mt, loc);
if (i > 2) {
loop(mt, loc, i - 1);
}
fprintf(stderr, "%d\n", i);
i--;
}
yk_mt_early_return(mt);
fprintf(stderr, "return\n");
NOOPT_VAL(res);
return i;
}

int main(int argc, char **argv) {
YkMT *mt = yk_mt_new(NULL);
yk_mt_hot_threshold_set(mt, 2);
YkLocation loc = yk_location_new();

int res = 9998;
NOOPT_VAL(loc);
NOOPT_VAL(res);
loop(mt, &loc, 3);
fprintf(stderr, "exit\n");
NOOPT_VAL(res);
yk_location_drop(loc);
yk_mt_shutdown(mt);
return (EXIT_SUCCESS);
}
71 changes: 71 additions & 0 deletions tests/c/early_return_recursive2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Run-time:
// env-var: YKD_LOG_IR=-:jit-pre-opt,jit-post-opt
// env-var: YKD_SERIALISE_COMPILATION=1
// env-var: YK_LOG=4
// stderr:
// yk-jit-event: start-tracing
// 0x{{loc2}}: 2
// 0x{{loc2}}: 1
// yk-jit-event: stop-tracing-early-return
// return
// 0x{{loc1}}: 3
// yk-jit-event: start-tracing
// 0x{{loc1}}: 2
// yk-jit-event: stop-tracing
// --- Begin jit-pre-opt ---
// ...
// --- End jit-pre-opt ---
// ...
// 0x{{loc1}}: 1
// return
// exit

// Check that early return from recursive interpreter loops works.
//
// In this scenario, the parent function starts tracing at location 1, a
// recursive interpreter loop runs and exits, but without encountering
// location 1 (the location that initiated tracing).

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <yk.h>
#include <yk_testing.h>

int loop(YkMT *, YkLocation *, YkLocation *, int);

int loop(YkMT *mt, YkLocation *use_loc, YkLocation *next_loc, int i) {
assert(use_loc != NULL);
NOOPT_VAL(i);
while (i > 0) {
yk_mt_control_point(mt, use_loc);
if (i > 2) {
loop(mt, next_loc, NULL, i - 1);
}
fprintf(stderr, "%p: %d\n", use_loc, i);
i--;
}
yk_mt_early_return(mt);
fprintf(stderr, "return\n");
return i;
}

int main(int argc, char **argv) {
YkMT *mt = yk_mt_new(NULL);
yk_mt_hot_threshold_set(mt, 0);

// First location: used by first level deep recursion.
YkLocation loc1 = yk_location_new();
// Second location: used by second level deep recursion.
YkLocation loc2 = yk_location_new();

NOOPT_VAL(loc1);
NOOPT_VAL(loc2);
loop(mt, &loc1, &loc2, 3);
fprintf(stderr, "exit\n");
yk_location_drop(loc1);
yk_location_drop(loc2);
yk_mt_shutdown(mt);
return (EXIT_SUCCESS);
}
1 change: 0 additions & 1 deletion tests/c/fcmp_double.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// Run-time:
// env-var: YKD_LOG_IR=aot,jit-pre-opt,jit-asm
// env-var: YKD_SERIALISE_COMPILATION=1
// env-var: YK_LOG=4
// stderr:
Expand Down
1 change: 0 additions & 1 deletion tests/c/fcmp_float.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// Run-time:
// env-var: YKD_LOG_IR=aot,jit-pre-opt,jit-asm
// env-var: YKD_SERIALISE_COMPILATION=1
// env-var: YK_LOG=4
// stderr:
Expand Down
8 changes: 8 additions & 0 deletions ykcapi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ pub extern "C" fn yk_mt_control_point(_mt: *mut MT, _loc: *mut Location) {
// Intentionally empty.
}

#[no_mangle]
pub unsafe extern "C" fn __yk_mt_early_return(mt: *mut MT, frameaddr: *mut c_void) {
let mt = unsafe { &*mt };
let arc = unsafe { Arc::from_raw(mt) };
arc.early_return(frameaddr);
forget(arc);
}

// The new control point called after the interpreter has been patched by ykllvm.
#[cfg(target_arch = "x86_64")]
#[naked]
Expand Down
Loading
Loading