From 4192d08b8dfd69dd6a0bf4eb037dd5d4904cfeed Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Sat, 28 Dec 2024 08:57:42 +0000 Subject: [PATCH] Pass the header `VarLocation`s to the reverse analyser. This allows it to add register hints to the body that reflect the state of the allocator at the *end* of the header rather than, as before, the *beginning*. In many cases, these two sets are the same, but this allows the header to pass additional things in registers to the body. --- .../compile/jitc_yk/codegen/x64/lsregalloc.rs | 4 +- ykrt/src/compile/jitc_yk/codegen/x64/mod.rs | 13 ++- .../jitc_yk/codegen/x64/rev_analyse.rs | 108 ++++++++---------- 3 files changed, 59 insertions(+), 66 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/lsregalloc.rs b/ykrt/src/compile/jitc_yk/codegen/x64/lsregalloc.rs index 5d240f04e..a9791fb33 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/lsregalloc.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/lsregalloc.rs @@ -169,7 +169,7 @@ impl<'a> LSRegAlloc<'a> { /// Reset the register allocator. We use this when moving from the trace header into the trace /// body. - pub(crate) fn reset(&mut self) { + pub(crate) fn reset(&mut self, header_end_vlocs: &[VarLocation]) { for rs in self.gp_reg_states.iter_mut() { *rs = RegState::Empty; } @@ -186,7 +186,7 @@ impl<'a> LSRegAlloc<'a> { } self.fp_regset = RegSet::with_fp_reserved(); - self.rev_an.analyse_body(); + self.rev_an.analyse_body(header_end_vlocs); } /// Before generating code for the instruction at `iidx`, see which registers are no longer diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs b/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs index a50871bdb..6aab3136d 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs @@ -1961,14 +1961,15 @@ impl<'a> Assemble<'a> { // that have become constants during the trace header. So we will always have to either // update the [ParamInst]s of the trace body, which isn't ideal since it requires the // [Module] the be mutable. Or we do what we do below just for constants. - let mut varlocs = Vec::new(); - for var in self.m.trace_header_end().iter() { - let varloc = self.op_to_var_location(var.unpack(self.m)); - varlocs.push(varloc); - } + let varlocs = self + .m + .trace_header_end() + .iter() + .map(|pop| self.op_to_var_location(pop.unpack(self.m))) + .collect::>(); // Reset the register allocator before priming it with information about the trace body // inputs. - self.ra.reset(); + self.ra.reset(varlocs.as_slice()); for (i, op) in self.m.trace_body_start().iter().enumerate() { // By definition these can only be variables. let iidx = match op.unpack(self.m) { diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs b/ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs index e71166531..6756978b0 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs @@ -41,6 +41,45 @@ impl<'a> RevAnalyse<'a> { /// Analyse a trace header. If the trace is [TraceKind::HeaderAndBody], you must call /// [Self::analyse_body] as soon as you have processed the trace header. pub fn analyse_header(&mut self) { + // First we populate the register hints for the end of the trace... + match self.m.tracekind() { + TraceKind::HeaderOnly => { + for ((iidx, inst), jump_op) in + self.m.iter_skipping_insts().zip(self.m.trace_header_end()) + { + match inst { + Inst::Param(pinst) => { + if let VarLocation::Register(reg) = VarLocation::from_yksmp_location( + self.m, + iidx, + self.m.param(pinst.paramidx()), + ) { + self.push_reg_hint_fixed(jump_op.unpack(self.m), reg); + } + } + _ => break, + } + } + } + TraceKind::HeaderAndBody => { + // We don't care where the register allocator ends at the end of the header, so we + // don't propagate backwards from `TraceHeaderEnd`. + } + TraceKind::Sidetrace => { + let vlocs = self.m.root_entry_vars(); + // Side-traces don't have a trace body since we don't apply loop peeling and thus use + // `trace_header_end` to store the jump variables. + debug_assert_eq!(vlocs.len(), self.m.trace_header_end().len()); + + for (vloc, jump_op) in vlocs.iter().zip(self.m.trace_header_end()) { + if let VarLocation::Register(reg) = *vloc { + self.push_reg_hint_fixed(jump_op.unpack(self.m), reg); + } + } + } + } + + // ...and then we perform the rest of the reverse analysis. let mut iter = self.m.iter_skipping_insts().rev(); match self.m.tracekind() { TraceKind::HeaderOnly | TraceKind::Sidetrace => { @@ -70,7 +109,13 @@ impl<'a> RevAnalyse<'a> { /// Analyse a trace body. This must be called iff both of the following are true: /// 1. the trace is [TraceKind::HeaderAndBody] /// 2. [Self::analyse_header] has already been called. - pub fn analyse_body(&mut self) { + pub fn analyse_body(&mut self, header_end_vlocs: &[VarLocation]) { + for (jump_op, vloc) in self.m.trace_body_end().iter().zip(header_end_vlocs) { + if let VarLocation::Register(reg) = vloc { + self.push_reg_hint_fixed(jump_op.unpack(self.m), *reg); + } + } + for (iidx, inst) in self.m.iter_skipping_insts().rev() { if let Inst::TraceHeaderEnd = inst { break; @@ -87,6 +132,9 @@ impl<'a> RevAnalyse<'a> { self.used_insts.set(usize::from(iidx), true); match inst { + Inst::TraceHeaderEnd | Inst::TraceBodyEnd | Inst::SidetraceEnd => { + // These are handled in [Self::analyse_header] or [Self::analyse_body]. + } Inst::BinOp(x) => self.an_binop(iidx, x), Inst::ICmp(x) => self.an_icmp(iidx, x), Inst::PtrAdd(x) => self.an_ptradd(iidx, x), @@ -104,15 +152,6 @@ impl<'a> RevAnalyse<'a> { return; } } - Inst::TraceHeaderEnd => { - self.an_header_end(); - } - Inst::TraceBodyEnd => { - self.an_body_end(); - } - Inst::SidetraceEnd => { - self.an_sidetrace_end(); - } Inst::SExt(x) => self.an_sext(iidx, x), Inst::ZExt(x) => self.an_zext(iidx, x), Inst::Select(x) => self.an_select(iidx, x), @@ -165,7 +204,7 @@ impl<'a> RevAnalyse<'a> { } BinOp::Sub => match (binst.lhs(self.m), binst.rhs(self.m)) { (_, Operand::Const(_)) => { - self.push_reg_hint(iidx, binst.rhs(self.m)); + self.push_reg_hint(iidx, binst.lhs(self.m)); } (Operand::Var(_), _) => { self.push_reg_hint(iidx, binst.lhs(self.m)); @@ -231,53 +270,6 @@ impl<'a> RevAnalyse<'a> { false } - fn an_header_end(&mut self) { - for ((iidx, inst), jump_op) in self.m.iter_skipping_insts().zip(self.m.trace_header_end()) { - match inst { - Inst::Param(pinst) => { - if let VarLocation::Register(reg) = VarLocation::from_yksmp_location( - self.m, - iidx, - self.m.param(pinst.paramidx()), - ) { - self.push_reg_hint_fixed(jump_op.unpack(self.m), reg); - } - } - _ => break, - } - } - } - - fn an_body_end(&mut self) { - for ((iidx, inst), jump_op) in self.m.iter_skipping_insts().zip(self.m.trace_body_end()) { - match inst { - Inst::Param(pinst) => { - if let VarLocation::Register(reg) = VarLocation::from_yksmp_location( - self.m, - iidx, - self.m.param(pinst.paramidx()), - ) { - self.push_reg_hint_fixed(jump_op.unpack(self.m), reg); - } - } - _ => break, - } - } - } - - fn an_sidetrace_end(&mut self) { - let vlocs = self.m.root_entry_vars(); - // Side-traces don't have a trace body since we don't apply loop peeling and thus use - // `trace_header_end` to store the jump variables. - debug_assert_eq!(vlocs.len(), self.m.trace_header_end().len()); - - for (vloc, jump_op) in vlocs.iter().zip(self.m.trace_header_end()) { - if let VarLocation::Register(reg) = *vloc { - self.push_reg_hint_fixed(jump_op.unpack(self.m), reg); - } - } - } - fn an_sext(&mut self, iidx: InstIdx, seinst: SExtInst) { self.push_reg_hint(iidx, seinst.val(self.m)); }