From 4ce6fe0f3eefc5b096093acc252220811721ac06 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Fri, 29 Nov 2024 15:45:41 +0000 Subject: [PATCH 01/13] Check that tracing doesn't finish in a different stack frame. An interpreter could do something like: ``` fn f(loc: &Location) { for i in 0..1000 { control_point(loc); if i == hot_threshold { return; } } } ``` In other words: we start tracing, then `return` before we hit the control point in the current stack frame again. --- ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs | 2 +- ykrt/src/mt.rs | 28 +++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs b/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs index 065e47936..f30bcbda9 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs @@ -320,7 +320,7 @@ pub(crate) extern "C" fn __yk_deopt( // The `clone` should really be `Arc::clone(&ctr)` but that doesn't play well with type // inference in this (unusual) case. - ctr.mt.guard_failure(ctr.clone(), gidx); + ctr.mt.guard_failure(ctr.clone(), gidx, frameaddr); // Since we won't return from this function, drop `ctr` manually. drop(ctr); diff --git a/ykrt/src/mt.rs b/ykrt/src/mt.rs index 713791ca9..699532414 100644 --- a/ykrt/src/mt.rs +++ b/ykrt/src/mt.rs @@ -438,6 +438,7 @@ impl MT { hl, thread_tracer: tt, promotions: Vec::new(), + frameaddr, }; }), Err(e) => todo!("{e:?}"), @@ -453,7 +454,11 @@ impl MT { hl, thread_tracer, promotions, - } => (hl, thread_tracer, promotions), + frameaddr: tracing_frameaddr, + } => { + assert_eq!(frameaddr as *const c_void, tracing_frameaddr); + (hl, thread_tracer, promotions) + } _ => unreachable!(), }, ); @@ -485,7 +490,11 @@ impl MT { hl, thread_tracer, promotions, - } => (hl, thread_tracer, promotions), + frameaddr: tracing_frameaddr, + } => { + assert_eq!(frameaddr as *const c_void, tracing_frameaddr); + (hl, thread_tracer, promotions) + } _ => unreachable!(), }, ); @@ -733,7 +742,12 @@ impl MT { // FIXME: Don't side trace the last guard of a side-trace as this guard always fails. // FIXME: Don't side-trace after switch instructions: not every guard failure is equal // and a trace compiled for case A won't work for case B. - pub(crate) fn guard_failure(self: &Arc, parent: Arc, gidx: GuardIdx) { + pub(crate) fn guard_failure( + self: &Arc, + parent: Arc, + gidx: GuardIdx, + frameaddr: *const c_void, + ) { match self.transition_guard_failure(parent, gidx) { TransitionGuardFailure::NoAction => (), TransitionGuardFailure::StartSideTracing(hl) => { @@ -748,6 +762,7 @@ impl MT { hl, thread_tracer: tt, promotions: Vec::new(), + frameaddr, }; }), Err(e) => todo!("{e:?}"), @@ -809,6 +824,9 @@ enum MTThreadState { thread_tracer: Box, /// Records the content of data recorded via `yk_promote`. promotions: Vec, + /// The `frameaddr` when tracing started. This allows us to tell if we're finishing tracing + /// at the same point that we started. + frameaddr: *const c_void, }, /// This thread is executing a trace. Note that the `dyn CompiledTrace` serves two different purposes: /// @@ -1023,6 +1041,7 @@ mod tests { hl, thread_tracer: Box::new(DummyTraceRecorder), promotions: Vec::new(), + frameaddr: 0 as *const c_void, }; }); } @@ -1047,6 +1066,7 @@ mod tests { hl, thread_tracer: Box::new(DummyTraceRecorder), promotions: Vec::new(), + frameaddr: 0 as *const c_void, }; }); } @@ -1168,6 +1188,7 @@ mod tests { hl, thread_tracer: Box::new(DummyTraceRecorder), promotions: Vec::new(), + frameaddr: 0 as *const c_void, }; }); break; @@ -1360,6 +1381,7 @@ mod tests { hl, thread_tracer: Box::new(DummyTraceRecorder), promotions: Vec::new(), + frameaddr: 0 as *const c_void, }; }); assert!(matches!( From bd40930564f7504f1dbe250654f2a8e14f497bc7 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Fri, 29 Nov 2024 16:27:31 +0000 Subject: [PATCH 02/13] Add early return and recursive tests. --- tests/c/early_return.c | 63 ++++++++ tests/c/fcmp_double.c | 1 - tests/c/fcmp_float.c | 1 - tests/c/recursive.c | 63 ++++++++ ykcapi/src/lib.rs | 7 + ykcapi/yk.h | 16 ++ ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs | 2 +- ykrt/src/mt.rs | 147 ++++++++++++------ 8 files changed, 252 insertions(+), 48 deletions(-) create mode 100644 tests/c/early_return.c create mode 100644 tests/c/recursive.c diff --git a/tests/c/early_return.c b/tests/c/early_return.c new file mode 100644 index 000000000..0b3e99020 --- /dev/null +++ b/tests/c/early_return.c @@ -0,0 +1,63 @@ +// Run-time: +// env-var: YKD_LOG_IR=-:jit-post-opt +// env-var: YKD_SERIALISE_COMPILATION=1 +// env-var: YK_LOG=4 +// stderr: +// 3 +// 2 +// yk-jit-event: start-tracing +// 1 +// yk-jit-event: stop-tracing-early-return +// return +// yk-jit-event: start-tracing +// 3 +// yk-jit-event: stop-tracing +// --- Begin jit-pre-opt --- +// ... +// --- End jit-pre-opt --- +// exit + +// Check that basic trace compilation works. + +#include +#include +#include +#include +#include +#include + +void loop(YkMT *, YkLocation *); + +void loop(YkMT *mt, YkLocation *loc) { + int res = 9998; + int i = 3; + NOOPT_VAL(res); + NOOPT_VAL(i); + while (i > 0) { + yk_mt_control_point(mt, loc); + fprintf(stderr, "%d\n", i); + i--; + } + yk_mt_early_return(mt); + fprintf(stderr, "return\n"); + NOOPT_VAL(res); +} + +int main(int argc, char **argv) { + YkMT *mt = yk_mt_new(NULL); + yk_mt_hot_threshold_set(mt, 2); + YkLocation loc = yk_location_new(); + + int res = 9998; + int i = 4; + NOOPT_VAL(loc); + NOOPT_VAL(res); + NOOPT_VAL(i); + loop(mt, &loc); + loop(mt, &loc); + fprintf(stderr, "exit\n"); + NOOPT_VAL(res); + yk_location_drop(loc); + yk_mt_shutdown(mt); + return (EXIT_SUCCESS); +} diff --git a/tests/c/fcmp_double.c b/tests/c/fcmp_double.c index 1fd425a85..30a6063f1 100644 --- a/tests/c/fcmp_double.c +++ b/tests/c/fcmp_double.c @@ -1,5 +1,4 @@ // Run-time: -// env-var: YKD_LOG_IR=aot,jit-pre-opt,jit-asm // env-var: YKD_SERIALISE_COMPILATION=1 // env-var: YK_LOG=4 // stderr: diff --git a/tests/c/fcmp_float.c b/tests/c/fcmp_float.c index 7089b0516..ef8b9e9e2 100644 --- a/tests/c/fcmp_float.c +++ b/tests/c/fcmp_float.c @@ -1,5 +1,4 @@ // Run-time: -// env-var: YKD_LOG_IR=aot,jit-pre-opt,jit-asm // env-var: YKD_SERIALISE_COMPILATION=1 // env-var: YK_LOG=4 // stderr: diff --git a/tests/c/recursive.c b/tests/c/recursive.c new file mode 100644 index 000000000..f9f369e74 --- /dev/null +++ b/tests/c/recursive.c @@ -0,0 +1,63 @@ +// Run-time: +// env-var: YKD_LOG_IR=-:jit-pre-opt,jit-post-opt +// env-var: YKD_SERIALISE_COMPILATION=1 +// env-var: YK_LOG=4 +// stderr: +// 3 +// 2 +// yk-jit-event: start-tracing +// 1 +// yk-jit-event: stop-tracing-early-return +// return +// yk-jit-event: start-tracing +// 3 +// yk-jit-event: stop-tracing +// --- Begin jit-pre-opt --- +// ... +// --- End jit-pre-opt --- +// exit + +// Check that basic trace compilation works. + +#include +#include +#include +#include +#include +#include + +int loop(YkMT *, YkLocation *, int); + +int loop(YkMT *mt, YkLocation *loc, int i) { + int res = 9998; + NOOPT_VAL(res); + NOOPT_VAL(i); + while (i > 0) { + yk_mt_control_point(mt, loc); + if (i > 2) { + loop(mt, loc, i - 1); + } + fprintf(stderr, "%d\n", i); + i--; + } + yk_mt_early_return(mt); + fprintf(stderr, "return\n"); + NOOPT_VAL(res); + return i; +} + +int main(int argc, char **argv) { + YkMT *mt = yk_mt_new(NULL); + yk_mt_hot_threshold_set(mt, 2); + YkLocation loc = yk_location_new(); + + int res = 9998; + NOOPT_VAL(loc); + NOOPT_VAL(res); + loop(mt, &loc, 3); + fprintf(stderr, "exit\n"); + NOOPT_VAL(res); + yk_location_drop(loc); + yk_mt_shutdown(mt); + return (EXIT_SUCCESS); +} diff --git a/ykcapi/src/lib.rs b/ykcapi/src/lib.rs index ba06dcc3f..22220ca58 100644 --- a/ykcapi/src/lib.rs +++ b/ykcapi/src/lib.rs @@ -50,6 +50,13 @@ pub extern "C" fn yk_mt_control_point(_mt: *mut MT, _loc: *mut Location) { // Intentionally empty. } +#[no_mangle] +pub extern "C" fn __yk_mt_early_return(mt: *mut MT, frameaddr: *mut c_void) { + let mt = unsafe { &*mt }; + let arc = unsafe { Arc::from_raw(mt) }; + arc.early_return(frameaddr); +} + // The new control point called after the interpreter has been patched by ykllvm. #[cfg(target_arch = "x86_64")] #[naked] diff --git a/ykcapi/yk.h b/ykcapi/yk.h index fad7223ca..244ccc4ed 100644 --- a/ykcapi/yk.h +++ b/ykcapi/yk.h @@ -49,6 +49,22 @@ void yk_mt_shutdown(YkMT *); // execute JITted code. void yk_mt_control_point(YkMT *, YkLocation *); +// At each point a function containing a control point can exit "early" this +// function must be called. "Early" includes, but is not +// limited to, the following: +// +// 1. Immediately after a non-infinite loop containing a call to +// `yk_mt_control_point`. +// 2. Immediately before `return` statements in code reachable from a +// `yk_mt_control_point`. +// +// Failure to call this function will lead to undefined behaviour. +# define yk_mt_early_return(mt) __yk_mt_early_return(mt, __builtin_frame_address(0)) + +// This is an internal function to yk: calling it directly leads to undefined +// behaviour. +void __yk_mt_early_return(YkMT *, void *); + // Set the threshold at which `YkLocation`'s are considered hot. void yk_mt_hot_threshold_set(YkMT *, YkHotThreshold); diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs b/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs index f30bcbda9..a55ab0a0e 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs @@ -92,7 +92,7 @@ fn running_trace(gidxs: &[usize]) -> Arc { /// * glen - Length for list in `gptr`. #[no_mangle] pub(crate) extern "C" fn __yk_deopt( - frameaddr: *const c_void, + frameaddr: *mut c_void, gidx: u64, gp_regs: &[u64; 16], fp_regs: &[u64; 16], diff --git a/ykrt/src/mt.rs b/ykrt/src/mt.rs index 699532414..a4d14d3c0 100644 --- a/ykrt/src/mt.rs +++ b/ykrt/src/mt.rs @@ -401,7 +401,7 @@ impl MT { #[allow(clippy::not_unsafe_ptr_arg_deref)] pub fn control_point(self: &Arc, loc: &Location, frameaddr: *mut c_void, smid: u64) { - match self.transition_control_point(loc) { + match self.transition_control_point(loc, frameaddr) { TransitionControlPoint::NoAction => (), TransitionControlPoint::Execute(ctr) => { self.log.log(Verbosity::JITEvent, "enter-jit-code"); @@ -456,7 +456,7 @@ impl MT { promotions, frameaddr: tracing_frameaddr, } => { - assert_eq!(frameaddr as *const c_void, tracing_frameaddr); + assert_eq!(frameaddr, tracing_frameaddr); (hl, thread_tracer, promotions) } _ => unreachable!(), @@ -492,7 +492,7 @@ impl MT { promotions, frameaddr: tracing_frameaddr, } => { - assert_eq!(frameaddr as *const c_void, tracing_frameaddr); + assert_eq!(frameaddr, tracing_frameaddr); (hl, thread_tracer, promotions) } _ => unreachable!(), @@ -524,7 +524,11 @@ impl MT { /// Perform the next step to `loc` in the `Location` state-machine for a control point. If /// `loc` moves to the Compiled state, return a pointer to a [CompiledTrace] object. - fn transition_control_point(self: &Arc, loc: &Location) -> TransitionControlPoint { + fn transition_control_point( + self: &Arc, + loc: &Location, + frameaddr: *mut c_void, + ) -> TransitionControlPoint { MTThread::with(|mtt| { let is_tracing = mtt.is_tracing(); match loc.hot_location() { @@ -587,16 +591,31 @@ impl MT { HotLocationKind::Tracing => { let hl = loc.hot_location_arc_clone().unwrap(); match &*mtt.tstate.borrow() { - MTThreadState::Tracing { hl: thread_hl, .. } => { + MTThreadState::Tracing { + hl: thread_hl, + frameaddr: tracing_frameaddr, + .. + } => { // This thread is tracing something... if !Arc::ptr_eq(thread_hl, &hl) { // ...but not this Location. TransitionControlPoint::NoAction } else { - // ...and it's this location: we have therefore finished - // tracing the loop. - lk.kind = HotLocationKind::Compiling; - TransitionControlPoint::StopTracing + // ...and it's this location... + if frameaddr == *tracing_frameaddr { + lk.kind = HotLocationKind::Compiling; + TransitionControlPoint::StopTracing + } else { + // ...but in a different frame. + #[cfg(target_arch = "x86_64")] + { + // If this assert fails, we've fallen through to a + // caller, which is UB. + assert!(*tracing_frameaddr > frameaddr); + } + // We're inlining. + TransitionControlPoint::NoAction + } } } _ => { @@ -746,7 +765,7 @@ impl MT { self: &Arc, parent: Arc, gidx: GuardIdx, - frameaddr: *const c_void, + frameaddr: *mut c_void, ) { match self.transition_guard_failure(parent, gidx) { TransitionGuardFailure::NoAction => (), @@ -770,6 +789,40 @@ impl MT { } } } + + pub fn early_return(self: &Arc, frameaddr: *mut c_void) { + MTThread::with(|mtt| { + let mut abort = false; + if let MTThreadState::Tracing { + frameaddr: tracing_frameaddr, + .. + } = &*mtt.tstate.borrow() + { + if frameaddr <= *tracing_frameaddr { + abort = true; + } + } + + if abort { + match mtt.tstate.replace(MTThreadState::Interpreting) { + MTThreadState::Tracing { + thread_tracer, hl, .. + } => { + // We don't care if the thread tracer went wrong: we're not going to use + // its result anyway. + thread_tracer.stop().unwrap(); + let mut lk = hl.lock(); + lk.tracecompilation_error(self); + drop(lk); + self.stats.timing_state(TimingState::None); + self.log + .log(Verbosity::JITEvent, "stop-tracing-early-return"); + } + _ => unreachable!(), + } + } + }); + } } #[cfg(target_arch = "x86_64")] @@ -826,7 +879,7 @@ enum MTThreadState { promotions: Vec, /// The `frameaddr` when tracing started. This allows us to tell if we're finishing tracing /// at the same point that we started. - frameaddr: *const c_void, + frameaddr: *mut c_void, }, /// This thread is executing a trace. Note that the `dyn CompiledTrace` serves two different purposes: /// @@ -1033,7 +1086,9 @@ mod tests { } fn expect_start_tracing(mt: &Arc, loc: &Location) { - let TransitionControlPoint::StartTracing(hl) = mt.transition_control_point(loc) else { + let TransitionControlPoint::StartTracing(hl) = + mt.transition_control_point(loc, 0 as *mut c_void) + else { panic!() }; MTThread::with(|mtt| { @@ -1041,13 +1096,15 @@ mod tests { hl, thread_tracer: Box::new(DummyTraceRecorder), promotions: Vec::new(), - frameaddr: 0 as *const c_void, + frameaddr: 0 as *mut c_void, }; }); } fn expect_stop_tracing(mt: &Arc, loc: &Location) { - let TransitionControlPoint::StopTracing = mt.transition_control_point(loc) else { + let TransitionControlPoint::StopTracing = + mt.transition_control_point(loc, 0 as *mut c_void) + else { panic!() }; MTThread::with(|mtt| { @@ -1066,7 +1123,7 @@ mod tests { hl, thread_tracer: Box::new(DummyTraceRecorder), promotions: Vec::new(), - frameaddr: 0 as *const c_void, + frameaddr: 0 as *mut c_void, }; }); } @@ -1080,7 +1137,7 @@ mod tests { let loc = Location::new(); for i in 0..mt.hot_threshold() { assert_eq!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::NoAction ); assert_eq!(loc.count(), Some(i + 1)); @@ -1100,12 +1157,12 @@ mod tests { ))); loc.hot_location().unwrap().lock().kind = HotLocationKind::Compiled(ctr.clone()); assert!(matches!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::Execute(_) )); expect_start_side_tracing(&mt, ctr); - match mt.transition_control_point(&loc) { + match mt.transition_control_point(&loc, 0 as *mut c_void) { TransitionControlPoint::StopSideTracing { .. } => { MTThread::with(|mtt| { *mtt.tstate.borrow_mut() = MTThreadState::Interpreting; @@ -1118,7 +1175,7 @@ mod tests { _ => unreachable!(), } assert!(matches!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::Execute(_) )); } @@ -1142,25 +1199,25 @@ mod tests { // otherwise tracing will start, and the assertions will fail. for _ in 0..hot_thrsh / (num_threads * 4) { assert_eq!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::NoAction ); let c1 = loc.count(); assert!(c1.is_some()); assert_eq!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::NoAction ); let c2 = loc.count(); assert!(c2.is_some()); assert_eq!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::NoAction ); let c3 = loc.count(); assert!(c3.is_some()); assert_eq!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::NoAction ); let c4 = loc.count(); @@ -1180,7 +1237,7 @@ mod tests { // at or below the threshold: it could even be (although it's rather unlikely) 0! assert!(loc.count().is_some()); loop { - match mt.transition_control_point(&loc) { + match mt.transition_control_point(&loc, 0 as *mut c_void) { TransitionControlPoint::NoAction => (), TransitionControlPoint::StartTracing(hl) => { MTThread::with(|mtt| { @@ -1188,7 +1245,7 @@ mod tests { hl, thread_tracer: Box::new(DummyTraceRecorder), promotions: Vec::new(), - frameaddr: 0 as *const c_void, + frameaddr: 0 as *mut c_void, }; }); break; @@ -1213,7 +1270,7 @@ mod tests { // Get the location to the point of being hot. for _ in 0..THRESHOLD { assert_eq!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::NoAction ); } @@ -1245,7 +1302,7 @@ mod tests { HotLocationKind::Tracing )); assert_eq!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::NoAction ); assert!(matches!( @@ -1266,7 +1323,7 @@ mod tests { // Get the location to the point of being hot. for _ in 0..THRESHOLD { assert_eq!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::NoAction ); } @@ -1324,17 +1381,17 @@ mod tests { for _ in 0..THRESHOLD { assert_eq!( - mt.transition_control_point(&loc1), + mt.transition_control_point(&loc1, 0 as *mut c_void), TransitionControlPoint::NoAction ); assert_eq!( - mt.transition_control_point(&loc2), + mt.transition_control_point(&loc2, 0 as *mut c_void), TransitionControlPoint::NoAction ); } expect_start_tracing(&mt, &loc1); assert_eq!( - mt.transition_control_point(&loc2), + mt.transition_control_point(&loc2, 0 as *mut c_void), TransitionControlPoint::NoAction ); assert!(matches!( @@ -1371,7 +1428,7 @@ mod tests { let num_starts = Arc::clone(&num_starts); thrs.push(thread::spawn(move || { for _ in 0..THRESHOLD { - match mt.transition_control_point(&loc) { + match mt.transition_control_point(&loc, 0 as *mut c_void) { TransitionControlPoint::NoAction => (), TransitionControlPoint::Execute(_) => (), TransitionControlPoint::StartTracing(hl) => { @@ -1381,7 +1438,7 @@ mod tests { hl, thread_tracer: Box::new(DummyTraceRecorder), promotions: Vec::new(), - frameaddr: 0 as *const c_void, + frameaddr: 0 as *mut c_void, }; }); assert!(matches!( @@ -1394,7 +1451,7 @@ mod tests { HotLocationKind::Compiling )); assert_eq!( - mt.transition_control_point(&loc), + mt.transition_control_point(&loc, 0 as *mut c_void), TransitionControlPoint::NoAction ); assert!(matches!( @@ -1406,7 +1463,7 @@ mod tests { ); loop { if let TransitionControlPoint::Execute(_) = - mt.transition_control_point(&loc) + mt.transition_control_point(&loc, 0 as *mut c_void) { break; } @@ -1441,11 +1498,11 @@ mod tests { for _ in 0..THRESHOLD { assert_eq!( - mt.transition_control_point(&loc1), + mt.transition_control_point(&loc1, 0 as *mut c_void), TransitionControlPoint::NoAction ); assert_eq!( - mt.transition_control_point(&loc2), + mt.transition_control_point(&loc2, 0 as *mut c_void), TransitionControlPoint::NoAction ); } @@ -1460,7 +1517,7 @@ mod tests { expect_start_tracing(&mt, &loc2); assert_eq!( - mt.transition_control_point(&loc1), + mt.transition_control_point(&loc1, 0 as *mut c_void), TransitionControlPoint::NoAction ); expect_stop_tracing(&mt, &loc2); @@ -1482,7 +1539,7 @@ mod tests { fn to_compiled(mt: &Arc, loc: &Location) -> Arc { for _ in 0..THRESHOLD { assert_eq!( - mt.transition_control_point(loc), + mt.transition_control_point(loc, 0 as *mut c_void), TransitionControlPoint::NoAction ); } @@ -1516,11 +1573,11 @@ mod tests { expect_start_side_tracing(&mt, ctr2); assert!(matches!( - dbg!(mt.transition_control_point(&loc1)), + dbg!(mt.transition_control_point(&loc1, 0 as *mut c_void)), TransitionControlPoint::Execute(_) )); assert!(matches!( - mt.transition_control_point(&loc2), + mt.transition_control_point(&loc2, 0 as *mut c_void), TransitionControlPoint::StopSideTracing { .. } )); } @@ -1531,7 +1588,7 @@ mod tests { let loc = Location::new(); b.iter(|| { for _ in 0..100000 { - black_box(mt.transition_control_point(&loc)); + black_box(mt.transition_control_point(&loc, 0 as *mut c_void)); } }); } @@ -1547,7 +1604,7 @@ mod tests { let mt = Arc::clone(&mt); thrs.push(thread::spawn(move || { for _ in 0..100 { - black_box(mt.transition_control_point(&loc)); + black_box(mt.transition_control_point(&loc, 0 as *mut c_void)); } })); } @@ -1578,14 +1635,14 @@ mod tests { // https://github.com/ykjit/yk/issues/519 expect_start_tracing(&mt, &loc2); assert!(matches!( - mt.transition_control_point(&loc1), + mt.transition_control_point(&loc1, 0 as *mut c_void), TransitionControlPoint::NoAction )); // But once we stop tracing for `loc2`, we should be able to execute the trace for `loc1`. expect_stop_tracing(&mt, &loc2); assert!(matches!( - mt.transition_control_point(&loc1), + mt.transition_control_point(&loc1, 0 as *mut c_void), TransitionControlPoint::Execute(_) )); } From 956f333678f32d9c0e2bb6a941e1a3bf8ddcd3d6 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Mon, 2 Dec 2024 10:04:53 +0000 Subject: [PATCH 03/13] Fix recursive.c --- tests/c/recursive.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/c/recursive.c b/tests/c/recursive.c index f9f369e74..c32fdb569 100644 --- a/tests/c/recursive.c +++ b/tests/c/recursive.c @@ -3,21 +3,24 @@ // env-var: YKD_SERIALISE_COMPILATION=1 // env-var: YK_LOG=4 // stderr: -// 3 // 2 // yk-jit-event: start-tracing // 1 // yk-jit-event: stop-tracing-early-return // return -// yk-jit-event: start-tracing // 3 +// yk-jit-event: start-tracing +// 2 // yk-jit-event: stop-tracing // --- Begin jit-pre-opt --- // ... // --- End jit-pre-opt --- +// ... +// 1 +// return // exit -// Check that basic trace compilation works. +// Check that early return from recursive interpreter loops works. #include #include From a1e927f390f300c6ebe2acc21b9037684a15c572 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Mon, 2 Dec 2024 10:45:07 +0000 Subject: [PATCH 04/13] Add another recursive test. --- tests/c/recursive2.c | 74 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 tests/c/recursive2.c diff --git a/tests/c/recursive2.c b/tests/c/recursive2.c new file mode 100644 index 000000000..15ecdd1ec --- /dev/null +++ b/tests/c/recursive2.c @@ -0,0 +1,74 @@ +// Run-time: +// env-var: YKD_LOG_IR=-:jit-pre-opt,jit-post-opt +// env-var: YKD_SERIALISE_COMPILATION=1 +// env-var: YK_LOG=4 +// stderr: +// yk-jit-event: start-tracing +// 0x{{loc2}}: 2 +// 0x{{loc2}}: 1 +// yk-jit-event: stop-tracing-early-return +// return +// 0x{{loc1}}: 3 +// yk-jit-event: start-tracing +// 0x{{loc1}}: 2 +// yk-jit-event: stop-tracing +// --- Begin jit-pre-opt --- +// ... +// --- End jit-pre-opt --- +// ... +// 0x{{loc1}}: 1 +// return +// exit + +// Check that early return from recursive interpreter loops works. +// +// In this scenario, the parent function starts tracing at location 1, a +// recursive interpreter loop runs and exits, but without encountering +// location 1 (the location that initiated tracing). +// +// XXX: question to Laurie: should the early_return from the inner interpreter +// loop abort tracing in this scenario? (It does, FWIW -- this test passes). + +#include +#include +#include +#include +#include +#include + +int loop(YkMT *, YkLocation *, YkLocation *, int); + +int loop(YkMT *mt, YkLocation *use_loc, YkLocation *next_loc, int i) { + assert(use_loc != NULL); + NOOPT_VAL(i); + while (i > 0) { + yk_mt_control_point(mt, use_loc); + if (i > 2) { + loop(mt, next_loc, NULL, i - 1); + } + fprintf(stderr, "%p: %d\n", use_loc, i); + i--; + } + yk_mt_early_return(mt); + fprintf(stderr, "return\n"); + return i; +} + +int main(int argc, char **argv) { + YkMT *mt = yk_mt_new(NULL); + yk_mt_hot_threshold_set(mt, 0); + + // First location: used by first level deep recursion. + YkLocation loc1 = yk_location_new(); + // Second location: used by second level deep recursion. + YkLocation loc2 = yk_location_new(); + + NOOPT_VAL(loc1); + NOOPT_VAL(loc2); + loop(mt, &loc1, &loc2, 3); + fprintf(stderr, "exit\n"); + yk_location_drop(loc1); + yk_location_drop(loc2); + yk_mt_shutdown(mt); + return (EXIT_SUCCESS); +} From f326481038692d110a919b5246c58d4960b8e4ba Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Mon, 2 Dec 2024 10:48:32 +0000 Subject: [PATCH 05/13] Fix Clippy warnings. --- ykcapi/src/lib.rs | 2 +- ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ykcapi/src/lib.rs b/ykcapi/src/lib.rs index 22220ca58..209187e40 100644 --- a/ykcapi/src/lib.rs +++ b/ykcapi/src/lib.rs @@ -51,7 +51,7 @@ pub extern "C" fn yk_mt_control_point(_mt: *mut MT, _loc: *mut Location) { } #[no_mangle] -pub extern "C" fn __yk_mt_early_return(mt: *mut MT, frameaddr: *mut c_void) { +pub unsafe extern "C" fn __yk_mt_early_return(mt: *mut MT, frameaddr: *mut c_void) { let mt = unsafe { &*mt }; let arc = unsafe { Arc::from_raw(mt) }; arc.early_return(frameaddr); diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs b/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs index a55ab0a0e..119ab0d20 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/deopt.rs @@ -326,7 +326,7 @@ pub(crate) extern "C" fn __yk_deopt( drop(ctr); // Now overwrite the existing stack with our newly recreated one. - unsafe { replace_stack(newframedst as *mut c_void, newstack, memsize) }; + unsafe { replace_stack(newframedst, newstack, memsize) }; } /// Writes the stack frames that we recreated in [__yk_deopt] onto the current stack, overwriting From efffbffa2ead0d70c345822225e631e484a00500 Mon Sep 17 00:00:00 2001 From: Lukas Diekmann Date: Tue, 3 Dec 2024 13:55:31 +0000 Subject: [PATCH 06/13] Don't drop MT when early returning. --- ykcapi/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/ykcapi/src/lib.rs b/ykcapi/src/lib.rs index 209187e40..d4228f166 100644 --- a/ykcapi/src/lib.rs +++ b/ykcapi/src/lib.rs @@ -55,6 +55,7 @@ pub unsafe extern "C" fn __yk_mt_early_return(mt: *mut MT, frameaddr: *mut c_voi let mt = unsafe { &*mt }; let arc = unsafe { Arc::from_raw(mt) }; arc.early_return(frameaddr); + forget(arc); } // The new control point called after the interpreter has been patched by ykllvm. From b40118325e9e60df397906bb3afb87eae472f350 Mon Sep 17 00:00:00 2001 From: Lukas Diekmann Date: Tue, 3 Dec 2024 14:21:46 +0000 Subject: [PATCH 07/13] Fix test. --- tests/c/early_return.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/c/early_return.c b/tests/c/early_return.c index 0b3e99020..5a388f7e3 100644 --- a/tests/c/early_return.c +++ b/tests/c/early_return.c @@ -12,9 +12,12 @@ // yk-jit-event: start-tracing // 3 // yk-jit-event: stop-tracing -// --- Begin jit-pre-opt --- // ... -// --- End jit-pre-opt --- +// 2 +// yk-jit-event: enter-jit-code +// 1 +// yk-jit-event: deoptimise +// return // exit // Check that basic trace compilation works. From e58a9d20f24f7c2e915bc6332f557a6c4d4f556f Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Wed, 4 Dec 2024 09:39:03 +0000 Subject: [PATCH 08/13] Update test descriptions. --- tests/c/early_return.c | 3 ++- tests/c/recursive2.c | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/c/early_return.c b/tests/c/early_return.c index 5a388f7e3..eff68a8b4 100644 --- a/tests/c/early_return.c +++ b/tests/c/early_return.c @@ -20,7 +20,8 @@ // return // exit -// Check that basic trace compilation works. +// Check that an early return caused by falling out of the interpreter loop is +// handled correctly. #include #include diff --git a/tests/c/recursive2.c b/tests/c/recursive2.c index 15ecdd1ec..c854f9353 100644 --- a/tests/c/recursive2.c +++ b/tests/c/recursive2.c @@ -25,9 +25,6 @@ // In this scenario, the parent function starts tracing at location 1, a // recursive interpreter loop runs and exits, but without encountering // location 1 (the location that initiated tracing). -// -// XXX: question to Laurie: should the early_return from the inner interpreter -// loop abort tracing in this scenario? (It does, FWIW -- this test passes). #include #include From e8b177496f920574962baeb02e81ced08f77b7d8 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Wed, 4 Dec 2024 09:40:58 +0000 Subject: [PATCH 09/13] More descriptive test names. --- tests/c/{early_return.c => early_return_fall_out.c} | 0 tests/c/{recursive.c => early_return_recursive1.c} | 0 tests/c/{recursive2.c => early_return_recursive2.c} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/c/{early_return.c => early_return_fall_out.c} (100%) rename tests/c/{recursive.c => early_return_recursive1.c} (100%) rename tests/c/{recursive2.c => early_return_recursive2.c} (100%) diff --git a/tests/c/early_return.c b/tests/c/early_return_fall_out.c similarity index 100% rename from tests/c/early_return.c rename to tests/c/early_return_fall_out.c diff --git a/tests/c/recursive.c b/tests/c/early_return_recursive1.c similarity index 100% rename from tests/c/recursive.c rename to tests/c/early_return_recursive1.c diff --git a/tests/c/recursive2.c b/tests/c/early_return_recursive2.c similarity index 100% rename from tests/c/recursive2.c rename to tests/c/early_return_recursive2.c From 3d9334c20bbb3b2cf15e910b18efcb2ab08f7d64 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Wed, 4 Dec 2024 10:02:29 +0000 Subject: [PATCH 10/13] Ensure tracing errors during early_return are ignored. (I was getting lots of TraceTooLong crashes, even though we are going to discard the trace). --- ykrt/src/mt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ykrt/src/mt.rs b/ykrt/src/mt.rs index a4d14d3c0..f981c98f7 100644 --- a/ykrt/src/mt.rs +++ b/ykrt/src/mt.rs @@ -810,7 +810,7 @@ impl MT { } => { // We don't care if the thread tracer went wrong: we're not going to use // its result anyway. - thread_tracer.stop().unwrap(); + thread_tracer.stop().ok(); let mut lk = hl.lock(); lk.tracecompilation_error(self); drop(lk); From 9be8a4cdd94035eba1806d5811ccacd6b4b02e1a Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Wed, 4 Dec 2024 11:04:36 +0000 Subject: [PATCH 11/13] Check for inlining in stopsidetracing. --- ykrt/src/mt.rs | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/ykrt/src/mt.rs b/ykrt/src/mt.rs index c2e4ae3ec..a0402f359 100644 --- a/ykrt/src/mt.rs +++ b/ykrt/src/mt.rs @@ -654,22 +654,38 @@ impl MT { } => { let hl = loc.hot_location_arc_clone().unwrap(); match &*mtt.tstate.borrow() { - MTThreadState::Tracing { hl: thread_hl, .. } => { + MTThreadState::Tracing { + hl: thread_hl, + frameaddr: tracing_frameaddr, + .. + } => { // This thread is tracing something... if !Arc::ptr_eq(thread_hl, &hl) { // ...but not this Location. TransitionControlPoint::Execute(Arc::clone(root_ctr)) } else { - // ...and it's this location: we have therefore finished - // tracing the loop. - let parent_ctr = Arc::clone(parent_ctr); - let root_ctr_cl = Arc::clone(root_ctr); - lk.kind = HotLocationKind::Compiled(Arc::clone(root_ctr)); - drop(lk); - TransitionControlPoint::StopSideTracing { - gidx, - parent_ctr, - root_ctr: root_ctr_cl, + // ...and it's this location. + if frameaddr == *tracing_frameaddr { + let parent_ctr = Arc::clone(parent_ctr); + let root_ctr_cl = Arc::clone(root_ctr); + lk.kind = + HotLocationKind::Compiled(Arc::clone(root_ctr)); + drop(lk); + TransitionControlPoint::StopSideTracing { + gidx, + parent_ctr, + root_ctr: root_ctr_cl, + } + } else { + // ...but in a different frame. + #[cfg(target_arch = "x86_64")] + { + // If this assert fails, we've fallen through to a + // caller, which is UB. + assert!(*tracing_frameaddr > frameaddr); + } + // We're inlining. + TransitionControlPoint::NoAction } } } From 274445c71e7923a871a850a27c513cce8a3ba528 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Wed, 4 Dec 2024 11:12:58 +0000 Subject: [PATCH 12/13] Revert "Re-use perf file descriptors for trace collection." This reverts commit bedafd9259361840d17eb665547b91b4a993b166. --- hwtracer/src/perf/collect.c | 38 +++++++++++++++------------------- hwtracer/src/pt/ykpt/parser.rs | 23 +++++--------------- 2 files changed, 22 insertions(+), 39 deletions(-) diff --git a/hwtracer/src/perf/collect.c b/hwtracer/src/perf/collect.c index e633dcca0..af5a252c4 100644 --- a/hwtracer/src/perf/collect.c +++ b/hwtracer/src/perf/collect.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -44,17 +43,6 @@ // The bit in the IA32_RTIT_CTL MSR that disables compressed returns. #define IA32_RTIT_CTL_DISRETC 1 << 11 -/* - * The thread's perf file descriptor and its associated underlying `mmap(2)` - * regions. The file descriptor is re-used for subsequent trace collections for - * the same thread. - * - * FIXME: These leak when a thread dies. - */ -static thread_local void *cache_base_buf = NULL; -static thread_local void *cache_aux_buf = NULL; -static thread_local int cache_perf_fd = -1; - enum hwt_cerror_kind { hwt_cerror_unused = 0, hwt_cerror_unknown = 1, @@ -530,9 +518,7 @@ hwt_perf_init_collector(struct hwt_perf_collector_config *tr_conf, tr_ctx->perf_fd = -1; // Obtain a file descriptor through which to speak to perf. - if (cache_perf_fd == -1) - cache_perf_fd = open_perf(tr_conf->aux_bufsize, err); - tr_ctx->perf_fd = cache_perf_fd; + tr_ctx->perf_fd = open_perf(tr_conf->aux_bufsize, err); if (tr_ctx->perf_fd == -1) { hwt_set_cerr(err, hwt_cerror_errno, errno); failing = true; @@ -562,10 +548,8 @@ hwt_perf_init_collector(struct hwt_perf_collector_config *tr_conf, // data_bufsize'. int page_size = getpagesize(); tr_ctx->base_bufsize = (1 + tr_conf->data_bufsize) * page_size; - if (!cache_base_buf) - cache_base_buf = mmap(NULL, tr_ctx->base_bufsize, PROT_WRITE, MAP_SHARED, + tr_ctx->base_buf = mmap(NULL, tr_ctx->base_bufsize, PROT_WRITE, MAP_SHARED, tr_ctx->perf_fd, 0); - tr_ctx->base_buf = cache_base_buf; if (tr_ctx->base_buf == MAP_FAILED) { hwt_set_cerr(err, hwt_cerror_errno, errno); failing = true; @@ -581,10 +565,8 @@ hwt_perf_init_collector(struct hwt_perf_collector_config *tr_conf, // Allocate the AUX buffer. // // Mapped R/W so as to have a saturating ring buffer. - if (!cache_aux_buf) - cache_aux_buf = mmap(NULL, base_header->aux_size, PROT_READ | PROT_WRITE, + tr_ctx->aux_buf = mmap(NULL, base_header->aux_size, PROT_READ | PROT_WRITE, MAP_SHARED, tr_ctx->perf_fd, base_header->aux_offset); - tr_ctx->aux_buf = cache_aux_buf; if (tr_ctx->aux_buf == MAP_FAILED) { hwt_set_cerr(err, hwt_cerror_errno, errno); failing = true; @@ -752,6 +734,16 @@ bool hwt_perf_free_collector(struct hwt_perf_ctx *tr_ctx, struct hwt_cerror *err) { int ret = true; + if ((tr_ctx->aux_buf) && + (munmap(tr_ctx->aux_buf, tr_ctx->aux_bufsize) == -1)) { + hwt_set_cerr(err, hwt_cerror_errno, errno); + ret = false; + } + if ((tr_ctx->base_buf) && + (munmap(tr_ctx->base_buf, tr_ctx->base_bufsize) == -1)) { + hwt_set_cerr(err, hwt_cerror_errno, errno); + ret = false; + } if (tr_ctx->stop_fds[1] != -1) { // If the write end of the pipe is still open, the thread is still running. close(tr_ctx->stop_fds[1]); // signals thread to stop. @@ -763,6 +755,10 @@ bool hwt_perf_free_collector(struct hwt_perf_ctx *tr_ctx, if (tr_ctx->stop_fds[0] != -1) { close(tr_ctx->stop_fds[0]); } + if (tr_ctx->perf_fd >= 0) { + close(tr_ctx->perf_fd); + tr_ctx->perf_fd = -1; + } if (tr_ctx != NULL) { free(tr_ctx); } diff --git a/hwtracer/src/pt/ykpt/parser.rs b/hwtracer/src/pt/ykpt/parser.rs index 0841ee022..162ca4b4b 100644 --- a/hwtracer/src/pt/ykpt/parser.rs +++ b/hwtracer/src/pt/ykpt/parser.rs @@ -11,6 +11,8 @@ use super::packets::*; #[derive(Clone, Copy, Debug)] enum PacketParserState { + /// Initial state, waiting for a PSB packet. + Init, /// The "normal" decoding state. Normal, /// We are decoding a PSB+ sequence. @@ -27,6 +29,7 @@ impl PacketParserState { // OPT: The order below is a rough guess based on what limited traces I've seen. Benchmark // and optimise. match self { + Self::Init => &[PacketKind::PSB], Self::Normal => &[ PacketKind::ShortTNT, PacketKind::PAD, @@ -60,6 +63,7 @@ impl PacketParserState { /// kind of packet. fn transition(&mut self, pkt_kind: PacketKind) { let new = match (*self, pkt_kind) { + (Self::Init, PacketKind::PSB) => Self::PSBPlus, (Self::Normal, PacketKind::PSB) => Self::PSBPlus, (Self::PSBPlus, PacketKind::PSBEND) => Self::Normal, _ => return, // No state transition. @@ -105,7 +109,7 @@ impl<'t> PacketParser<'t> { pub(super) fn new(bytes: &'t [u8]) -> Self { Self { bits: BitSlice::from_slice(bytes), - state: PacketParserState::Normal, + state: PacketParserState::Init, prev_tip: 0, } } @@ -228,7 +232,6 @@ impl Iterator for PacketParser<'_> { mod tests { use super::{super::packets::*, PacketParser}; use crate::{trace_closure, work_loop, TracerBuilder, TracerKind}; - use std::hint::black_box; /// Parse the packets of a small trace, checking the basic structure of the decoded trace. #[test] @@ -270,22 +273,6 @@ mod tests { assert!(matches!(ts, TestState::SawPacketGenDisable)); } - /// Checks PT packet streams make sense when a perf fd is re-used. - #[test] - fn decode_many() { - let tc = TracerBuilder::new() - .tracer_kind(TracerKind::PT(crate::perf::PerfCollectorConfig::default())) - .build() - .unwrap(); - for _ in 0..50 { - let trace = trace_closure(&tc, || work_loop(3)); - // Force full-decoding of the trace. - for p in PacketParser::new(trace.bytes()) { - let _ = black_box(p); - } - } - } - /// Test target IP decompression when the `IPBytes = 0b000`. #[test] fn ipbytes_decompress_000() { From 606e89b5385e6c830f99c6941cac388933f47c8e Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Wed, 4 Dec 2024 11:46:13 +0000 Subject: [PATCH 13/13] Don't execute a trace while tracing. --- ykrt/src/mt.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ykrt/src/mt.rs b/ykrt/src/mt.rs index a0402f359..97c35acdd 100644 --- a/ykrt/src/mt.rs +++ b/ykrt/src/mt.rs @@ -662,7 +662,7 @@ impl MT { // This thread is tracing something... if !Arc::ptr_eq(thread_hl, &hl) { // ...but not this Location. - TransitionControlPoint::Execute(Arc::clone(root_ctr)) + TransitionControlPoint::NoAction } else { // ...and it's this location. if frameaddr == *tracing_frameaddr { @@ -691,6 +691,7 @@ impl MT { } _ => { // This thread isn't tracing anything. + assert!(!is_tracing); TransitionControlPoint::Execute(Arc::clone(root_ctr)) } }