From 446ae368a915cde47b1b0e4178a5406d04a4d5dc Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 19 Dec 2024 11:27:46 +0000 Subject: [PATCH 01/10] Move function. This is the "main" function for this `impl`, so having it in the middle of other functions is confusing. No functional change. --- ykrt/src/compile/jitc_yk/opt/mod.rs | 136 ++++++++++++++-------------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index 3e2e7d8eb..03e146832 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -35,6 +35,74 @@ impl Opt { Self { m, an, instll } } + fn opt(mut self) -> Result { + let base = self.m.insts_len(); + let peel = match self.m.inst(self.m.last_inst_idx()) { + // If this is a sidetrace, we perform optimisations up to, but including, loop peeling. + Inst::SidetraceEnd => false, + Inst::TraceHeaderEnd => true, + #[cfg(test)] + // Not all tests create "fully valid" traces, in the sense that -- to keep things + // simple -- they don't end with `TraceHeaderEnd`. We don't want to peel such traces, + // but nor, in testing mode, do we consider them ill-formed. + _ => false, + #[cfg(not(test))] + _ => panic!(), + }; + + // Note that since we will apply loop peeling here, the list of instructions grows as this + // loop runs. Each instruction we process is (after optimisations were applied), duplicated + // and copied to the end of the module. + let skipping = self.m.iter_skipping_insts().collect::>(); + for (iidx, _inst) in skipping.into_iter() { + self.opt_inst(iidx)?; + self.cse(iidx); + } + // FIXME: When code generation supports backwards register allocation, we won't need to + // explicitly perform dead code elimination and this function can be made `#[cfg(test)]` only. + self.m.dead_code_elimination(); + + if !peel { + return Ok(self.m); + } + + // Now that we've processed the trace header, duplicate it to create the loop body. + // FIXME: Do we need to call `iter_skipping_inst_idxs` again? + // Maps header instructions to their position in the body. + let mut iidx_map = vec![0; base]; + let skipping = self.m.iter_skipping_insts().collect::>(); + for (iidx, inst) in skipping.into_iter() { + let c = inst.dup_and_remap_locals(&mut self.m, &|i: InstIdx| { + let newiidx = iidx_map[usize::from(i)]; + Operand::Var(InstIdx::try_from(newiidx).unwrap()) + })?; + let copyiidx = self.m.push(c)?; + iidx_map[usize::from(iidx)] = usize::from(copyiidx); + if let Inst::TraceHeaderStart = inst { + for (headop, bodyop) in self + .m + .trace_header_end() + .iter() + .zip(self.m.trace_body_start()) + { + // Inform the analyser about any constants being passed from the header into + // the body. + if let Operand::Const(cidx) = headop.unpack(&self.m) { + let Operand::Var(op_iidx) = bodyop.unpack(&self.m) else { + panic!() + }; + self.an.set_value(op_iidx, Value::Const(cidx)); + } + } + } + self.opt_inst(copyiidx)?; + } + + // FIXME: Apply CSE and run another pass of optimisations on the peeled loop. + self.m.dead_code_elimination(); + Ok(self.m) + } + /// Optimise instruction `iidx`. fn opt_inst(&mut self, iidx: InstIdx) -> Result<(), CompilationError> { match self.m.inst(iidx) { @@ -367,74 +435,6 @@ impl Opt { Ok(()) } - fn opt(mut self) -> Result { - let base = self.m.insts_len(); - let peel = match self.m.inst(self.m.last_inst_idx()) { - // If this is a sidetrace, we perform optimisations up to, but including, loop peeling. - Inst::SidetraceEnd => false, - Inst::TraceHeaderEnd => true, - #[cfg(test)] - // Not all tests create "fully valid" traces, in the sense that -- to keep things - // simple -- they don't end with `TraceHeaderEnd`. We don't want to peel such traces, - // but nor, in testing mode, do we consider them ill-formed. - _ => false, - #[cfg(not(test))] - _ => panic!(), - }; - - // Note that since we will apply loop peeling here, the list of instructions grows as this - // loop runs. Each instruction we process is (after optimisations were applied), duplicated - // and copied to the end of the module. - let skipping = self.m.iter_skipping_insts().collect::>(); - for (iidx, _inst) in skipping.into_iter() { - self.opt_inst(iidx)?; - self.cse(iidx); - } - // FIXME: When code generation supports backwards register allocation, we won't need to - // explicitly perform dead code elimination and this function can be made `#[cfg(test)]` only. - self.m.dead_code_elimination(); - - if !peel { - return Ok(self.m); - } - - // Now that we've processed the trace header, duplicate it to create the loop body. - // FIXME: Do we need to call `iter_skipping_inst_idxs` again? - // Maps header instructions to their position in the body. - let mut iidx_map = vec![0; base]; - let skipping = self.m.iter_skipping_insts().collect::>(); - for (iidx, inst) in skipping.into_iter() { - let c = inst.dup_and_remap_locals(&mut self.m, &|i: InstIdx| { - let newiidx = iidx_map[usize::from(i)]; - Operand::Var(InstIdx::try_from(newiidx).unwrap()) - })?; - let copyiidx = self.m.push(c)?; - iidx_map[usize::from(iidx)] = usize::from(copyiidx); - if let Inst::TraceHeaderStart = inst { - for (headop, bodyop) in self - .m - .trace_header_end() - .iter() - .zip(self.m.trace_body_start()) - { - // Inform the analyser about any constants being passed from the header into - // the body. - if let Operand::Const(cidx) = headop.unpack(&self.m) { - let Operand::Var(op_iidx) = bodyop.unpack(&self.m) else { - panic!() - }; - self.an.set_value(op_iidx, Value::Const(cidx)); - } - } - } - self.opt_inst(copyiidx)?; - } - - // FIXME: Apply CSE and run another pass of optimisations on the peeled loop. - self.m.dead_code_elimination(); - Ok(self.m) - } - /// Attempt common subexpression elimination on `iidx`, replacing it with a `Copy` or /// `Tombstone` if possible. fn cse(&mut self, iidx: InstIdx) { From 9ece1e2b64c546f32a5ba0979d894e4091fdfd5e Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 19 Dec 2024 11:32:08 +0000 Subject: [PATCH 02/10] Make clear that we can't optimise a `TraceHeaderStart`. This isn't a big deal on its own, but will make some things more obvious soon. --- ykrt/src/compile/jitc_yk/opt/mod.rs | 47 +++++++++++++++++------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index 03e146832..e9a73b64d 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -54,9 +54,14 @@ impl Opt { // loop runs. Each instruction we process is (after optimisations were applied), duplicated // and copied to the end of the module. let skipping = self.m.iter_skipping_insts().collect::>(); - for (iidx, _inst) in skipping.into_iter() { - self.opt_inst(iidx)?; - self.cse(iidx); + for (iidx, inst) in skipping.into_iter() { + match inst { + Inst::TraceHeaderStart => (), + _ => { + self.opt_inst(iidx)?; + self.cse(iidx); + } + } } // FIXME: When code generation supports backwards register allocation, we won't need to // explicitly perform dead code elimination and this function can be made `#[cfg(test)]` only. @@ -78,24 +83,26 @@ impl Opt { })?; let copyiidx = self.m.push(c)?; iidx_map[usize::from(iidx)] = usize::from(copyiidx); - if let Inst::TraceHeaderStart = inst { - for (headop, bodyop) in self - .m - .trace_header_end() - .iter() - .zip(self.m.trace_body_start()) - { - // Inform the analyser about any constants being passed from the header into - // the body. - if let Operand::Const(cidx) = headop.unpack(&self.m) { - let Operand::Var(op_iidx) = bodyop.unpack(&self.m) else { - panic!() - }; - self.an.set_value(op_iidx, Value::Const(cidx)); + match inst { + Inst::TraceHeaderStart => { + for (headop, bodyop) in self + .m + .trace_header_end() + .iter() + .zip(self.m.trace_body_start()) + { + // Inform the analyser about any constants being passed from the header into + // the body. + if let Operand::Const(cidx) = headop.unpack(&self.m) { + let Operand::Var(op_iidx) = bodyop.unpack(&self.m) else { + panic!() + }; + self.an.set_value(op_iidx, Value::Const(cidx)); + } } } + _ => self.opt_inst(copyiidx)?, } - self.opt_inst(copyiidx)?; } // FIXME: Apply CSE and run another pass of optimisations on the peeled loop. @@ -108,7 +115,9 @@ impl Opt { match self.m.inst(iidx) { #[cfg(test)] Inst::BlackBox(_) => (), - Inst::Const(_) | Inst::Copy(_) | Inst::Tombstone => unreachable!(), + Inst::Const(_) | Inst::Copy(_) | Inst::Tombstone | Inst::TraceHeaderStart => { + unreachable!() + } Inst::BinOp(x) => match x.binop() { BinOp::Add => match ( self.an.op_map(&self.m, x.lhs(&self.m)), From 1805d8d0cb4de50bf25b0b9d923dbafd5b7e977c Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 19 Dec 2024 11:41:46 +0000 Subject: [PATCH 03/10] Make `iidx_map` map to `InstIdx`s not `usize`s. Whilst here, use `InstIdx::max()` as the dummy value: except in the unlucky case where we magically end up with exactly `InstIdx::max()` instructions after peeling, this will make errors more likely to stand out. --- ykrt/src/compile/jitc_yk/opt/mod.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index e9a73b64d..9e11a6d3d 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -74,15 +74,14 @@ impl Opt { // Now that we've processed the trace header, duplicate it to create the loop body. // FIXME: Do we need to call `iter_skipping_inst_idxs` again? // Maps header instructions to their position in the body. - let mut iidx_map = vec![0; base]; + let mut iidx_map = vec![InstIdx::max(); base]; let skipping = self.m.iter_skipping_insts().collect::>(); for (iidx, inst) in skipping.into_iter() { - let c = inst.dup_and_remap_locals(&mut self.m, &|i: InstIdx| { - let newiidx = iidx_map[usize::from(i)]; - Operand::Var(InstIdx::try_from(newiidx).unwrap()) + let c = inst.dup_and_remap_locals(&mut self.m, &|op_iidx: InstIdx| { + Operand::Var(iidx_map[usize::from(op_iidx)]) })?; - let copyiidx = self.m.push(c)?; - iidx_map[usize::from(iidx)] = usize::from(copyiidx); + let copy_iidx = self.m.push(c)?; + iidx_map[usize::from(iidx)] = copy_iidx; match inst { Inst::TraceHeaderStart => { for (headop, bodyop) in self @@ -101,7 +100,7 @@ impl Opt { } } } - _ => self.opt_inst(copyiidx)?, + _ => self.opt_inst(copy_iidx)?, } } From 64799f5ca99fe11f90a2a2b4a4a0fc56f41319be Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 19 Dec 2024 11:45:46 +0000 Subject: [PATCH 04/10] Remove `&`. We *might* need to put this back (perhaps as `&mut`) later, but for now, this enables inlining. --- ykrt/src/compile/jitc_yk/jit_ir/mod.rs | 2 +- ykrt/src/compile/jitc_yk/opt/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs index e7ae6c4bf..1c6997184 100644 --- a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs +++ b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs @@ -1632,7 +1632,7 @@ impl Inst { pub(crate) fn dup_and_remap_locals( &self, m: &mut Module, - f: &F, + f: F, ) -> Result where F: Fn(InstIdx) -> Operand, diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index 9e11a6d3d..ddd7b4326 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -77,7 +77,7 @@ impl Opt { let mut iidx_map = vec![InstIdx::max(); base]; let skipping = self.m.iter_skipping_insts().collect::>(); for (iidx, inst) in skipping.into_iter() { - let c = inst.dup_and_remap_locals(&mut self.m, &|op_iidx: InstIdx| { + let c = inst.dup_and_remap_locals(&mut self.m, |op_iidx: InstIdx| { Operand::Var(iidx_map[usize::from(op_iidx)]) })?; let copy_iidx = self.m.push(c)?; From da34ac0ec3dfa3e6e47442379279618c237ae130 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 19 Dec 2024 11:46:51 +0000 Subject: [PATCH 05/10] "Variable" is the correct SSA term for these functions. Also make the doc string for `dup_and_remap` more explicit about its guarantees. --- ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs | 2 +- ykrt/src/compile/jitc_yk/jit_ir/dead_code.rs | 2 +- ykrt/src/compile/jitc_yk/jit_ir/mod.rs | 10 ++++++---- ykrt/src/compile/jitc_yk/jit_ir/well_formed.rs | 2 +- ykrt/src/compile/jitc_yk/opt/mod.rs | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs b/ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs index 760099a7e..9f01245aa 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs @@ -68,7 +68,7 @@ impl<'a> RevAnalyse<'a> { } // Calculate inst_vals_alive_until - inst.map_operand_locals(self.m, &mut |x| { + inst.map_operand_vars(self.m, &mut |x| { self.used_insts.set(usize::from(x), true); if self.inst_vals_alive_until[usize::from(x)] < iidx { self.inst_vals_alive_until[usize::from(x)] = iidx; diff --git a/ykrt/src/compile/jitc_yk/jit_ir/dead_code.rs b/ykrt/src/compile/jitc_yk/jit_ir/dead_code.rs index e0ee3d3c2..712c38784 100644 --- a/ykrt/src/compile/jitc_yk/jit_ir/dead_code.rs +++ b/ykrt/src/compile/jitc_yk/jit_ir/dead_code.rs @@ -17,7 +17,7 @@ impl Module { || inst.is_barrier(self) { used.set(usize::from(iidx), true); - inst.map_operand_locals(self, &mut |x| { + inst.map_operand_vars(self, &mut |x| { used.set(usize::from(x), true); }); } else { diff --git a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs index 1c6997184..aff38a6df 100644 --- a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs +++ b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs @@ -1521,9 +1521,9 @@ impl Inst { } /// Apply the function `f` to each of this instruction's [Operand]s iff they are of type - /// [Operand::Local]. When an instruction references more than one [Operand], the order of + /// [Operand::Var]. When an instruction references more than one [Operand], the order of /// traversal is undefined. - pub(crate) fn map_operand_locals(&self, m: &Module, f: &mut F) + pub(crate) fn map_operand_vars(&self, m: &Module, f: &mut F) where F: FnMut(InstIdx), { @@ -1628,8 +1628,10 @@ impl Inst { } } - /// Duplicate this [Inst] while applying function `f` to each operand. - pub(crate) fn dup_and_remap_locals( + /// Duplicate this [Inst] and, during that process, apply the function `f` to each of this + /// instruction's [Operand]s iff they are of type [Operand::Var]. When an instruction + /// references more than one [Operand], the order of traversal is undefined. + pub(crate) fn dup_and_remap_vars( &self, m: &mut Module, f: F, diff --git a/ykrt/src/compile/jitc_yk/jit_ir/well_formed.rs b/ykrt/src/compile/jitc_yk/jit_ir/well_formed.rs index 301887385..9c01206e5 100644 --- a/ykrt/src/compile/jitc_yk/jit_ir/well_formed.rs +++ b/ykrt/src/compile/jitc_yk/jit_ir/well_formed.rs @@ -38,7 +38,7 @@ impl Module { let mut last_inst = None; for (iidx, inst) in self.iter_skipping_insts() { - inst.map_operand_locals(self, &mut |x| { + inst.map_operand_vars(self, &mut |x| { if let Inst::Tombstone = self.insts[usize::from(x)] { panic!( "Instruction at position {iidx} uses undefined value (%{x})\n {}", diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index ddd7b4326..aa910a5a0 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -77,7 +77,7 @@ impl Opt { let mut iidx_map = vec![InstIdx::max(); base]; let skipping = self.m.iter_skipping_insts().collect::>(); for (iidx, inst) in skipping.into_iter() { - let c = inst.dup_and_remap_locals(&mut self.m, |op_iidx: InstIdx| { + let c = inst.dup_and_remap_vars(&mut self.m, |op_iidx: InstIdx| { Operand::Var(iidx_map[usize::from(op_iidx)]) })?; let copy_iidx = self.m.push(c)?; From ab09557f32b88d42b753a26920b688272659b081 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 19 Dec 2024 11:53:40 +0000 Subject: [PATCH 06/10] Fix `dup_and_remap_vars` so that it doesn't copy instruction types. Previously for `TraceHeaderStart` and `TraceHeaderEnd` this function did both remapping and copying e.g.: ``` m.trace_body_start = m.trace_header_start ``` This meant that calling this function more than once did very surprising things. This commit doesn't fix all the surprises -- it's still not idempotent -- but it now has fewer surprises than before. `TraceHeaderStart|End` now does nothing surprising, and `TraceBodyStart|End` is where the non-idempotency now resides: however, in practise, it's now "benign" idempotency, even if I still don't like it. For the time being, I have chosen to make the relevant `Module` fields `pub(crate)` partly as a reminder that something fishy is going on here. I can't quite work out what the right long-term fix is here, so I'll come back to it. --- ykrt/src/compile/jitc_yk/jit_ir/mod.rs | 20 ++++++-------- ykrt/src/compile/jitc_yk/opt/mod.rs | 37 +++++++++++++++++++++----- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs index aff38a6df..ab5be9fb4 100644 --- a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs +++ b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs @@ -169,10 +169,10 @@ pub(crate) struct Module { /// Live variables at the beginning of the root trace. root_entry_vars: Vec, /// Live variables at the beginning of the trace body. - trace_body_start: Vec, + pub(crate) trace_body_start: Vec, /// The ordered sequence of operands at the end of the trace body: there will be one per /// [Operand] at the start of the loop. - trace_body_end: Vec, + pub(crate) trace_body_end: Vec, /// Live variables at the beginning the trace header. trace_header_start: Vec, /// Live variables at the end of the trace header. @@ -1631,6 +1631,8 @@ impl Inst { /// Duplicate this [Inst] and, during that process, apply the function `f` to each of this /// instruction's [Operand]s iff they are of type [Operand::Var]. When an instruction /// references more than one [Operand], the order of traversal is undefined. + /// + /// Note: for `TraceBody*` and `TraceHeader*` functions, this function is not idempotent. pub(crate) fn dup_and_remap_vars( &self, m: &mut Module, @@ -1790,18 +1792,12 @@ impl Inst { volatile: *volatile, }), Inst::Tombstone => Inst::Tombstone, - Inst::TraceHeaderStart => { - // Copy the header label into the body while remapping the operands. - m.trace_body_start = m - .trace_header_start - .iter() - .map(|op| mapper(m, op)) - .collect(); + Inst::TraceBodyStart => { + m.trace_body_start = m.trace_body_start.iter().map(|op| mapper(m, op)).collect(); Inst::TraceBodyStart } - Inst::TraceHeaderEnd => { - // Copy the header label into the body while remapping the operands. - m.trace_body_end = m.trace_header_end.iter().map(|op| mapper(m, op)).collect(); + Inst::TraceBodyEnd => { + m.trace_body_end = m.trace_body_end.iter().map(|op| mapper(m, op)).collect(); Inst::TraceBodyEnd } Inst::Trunc(TruncInst { val, dest_tyidx }) => Inst::Trunc(TruncInst { diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index aa910a5a0..58a6e84da 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -57,6 +57,7 @@ impl Opt { for (iidx, inst) in skipping.into_iter() { match inst { Inst::TraceHeaderStart => (), + Inst::TraceHeaderEnd => (), _ => { self.opt_inst(iidx)?; self.cse(iidx); @@ -77,13 +78,15 @@ impl Opt { let mut iidx_map = vec![InstIdx::max(); base]; let skipping = self.m.iter_skipping_insts().collect::>(); for (iidx, inst) in skipping.into_iter() { - let c = inst.dup_and_remap_vars(&mut self.m, |op_iidx: InstIdx| { - Operand::Var(iidx_map[usize::from(op_iidx)]) - })?; - let copy_iidx = self.m.push(c)?; - iidx_map[usize::from(iidx)] = copy_iidx; match inst { Inst::TraceHeaderStart => { + self.m.trace_body_start = self.m.trace_header_start().to_vec(); + self.m.push(Inst::TraceBodyStart)?; + // FIXME: We rely on `dup_and_remap_vars` not being idempotent here. + let _ = Inst::TraceBodyStart + .dup_and_remap_vars(&mut self.m, |op_iidx: InstIdx| { + Operand::Var(iidx_map[usize::from(op_iidx)]) + })?; for (headop, bodyop) in self .m .trace_header_end() @@ -100,7 +103,23 @@ impl Opt { } } } - _ => self.opt_inst(copy_iidx)?, + Inst::TraceHeaderEnd => { + self.m.trace_body_end = self.m.trace_header_end().to_vec(); + self.m.push(Inst::TraceBodyEnd)?; + // FIXME: We rely on `dup_and_remap_vars` not being idempotent here. + let _ = Inst::TraceBodyEnd + .dup_and_remap_vars(&mut self.m, |op_iidx: InstIdx| { + Operand::Var(iidx_map[usize::from(op_iidx)]) + })?; + } + _ => { + let c = inst.dup_and_remap_vars(&mut self.m, |op_iidx: InstIdx| { + Operand::Var(iidx_map[usize::from(op_iidx)]) + })?; + let copy_iidx = self.m.push(c)?; + iidx_map[usize::from(iidx)] = copy_iidx; + self.opt_inst(copy_iidx)?; + } } } @@ -114,7 +133,11 @@ impl Opt { match self.m.inst(iidx) { #[cfg(test)] Inst::BlackBox(_) => (), - Inst::Const(_) | Inst::Copy(_) | Inst::Tombstone | Inst::TraceHeaderStart => { + Inst::Const(_) + | Inst::Copy(_) + | Inst::Tombstone + | Inst::TraceHeaderStart + | Inst::TraceHeaderEnd => { unreachable!() } Inst::BinOp(x) => match x.binop() { From 0e90b07e3dc6874728a4af221d1cc3459b6e1834 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 19 Dec 2024 12:35:17 +0000 Subject: [PATCH 07/10] Optimise the body after peeling. Right now optimising each instruction as its copied probably works OK, but it's going to be fragile in the long-term. In particular, this puts a *very* strong constraint on `iidx_map` that I suspect we'll forget. This commit turns this into a stage two thing: first we peel, creating a body that's virtually identical to the header. Once that's completely done, we then run the optimiser over the complete body. This means that optimising the header and body now run in conceptually the same manner: it's a bit slower (though probably unmeasureably so!) but less likely for us to shoot ourselves in the foot. --- ykrt/src/compile/jitc_yk/opt/mod.rs | 39 ++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index 58a6e84da..5e68d00b3 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -24,15 +24,12 @@ use instll::InstLinkedList; struct Opt { m: Module, an: Analyse, - /// Needed for common subexpression elimination. - instll: InstLinkedList, } impl Opt { fn new(m: Module) -> Self { let an = Analyse::new(&m); - let instll = InstLinkedList::new(&m); - Self { m, an, instll } + Self { m, an } } fn opt(mut self) -> Result { @@ -53,6 +50,7 @@ impl Opt { // Note that since we will apply loop peeling here, the list of instructions grows as this // loop runs. Each instruction we process is (after optimisations were applied), duplicated // and copied to the end of the module. + let mut instll = InstLinkedList::new(&self.m); let skipping = self.m.iter_skipping_insts().collect::>(); for (iidx, inst) in skipping.into_iter() { match inst { @@ -60,7 +58,7 @@ impl Opt { Inst::TraceHeaderEnd => (), _ => { self.opt_inst(iidx)?; - self.cse(iidx); + self.cse(&mut instll, iidx); } } } @@ -73,8 +71,6 @@ impl Opt { } // Now that we've processed the trace header, duplicate it to create the loop body. - // FIXME: Do we need to call `iter_skipping_inst_idxs` again? - // Maps header instructions to their position in the body. let mut iidx_map = vec![InstIdx::max(); base]; let skipping = self.m.iter_skipping_insts().collect::>(); for (iidx, inst) in skipping.into_iter() { @@ -118,7 +114,26 @@ impl Opt { })?; let copy_iidx = self.m.push(c)?; iidx_map[usize::from(iidx)] = copy_iidx; - self.opt_inst(copy_iidx)?; + } + } + } + + // Create a fresh `instll`. Normal CSE in the body (a) can't possibly reference the header + // (b) the number of instructions in the `instll`-for-the-header is wrong as a result of + // peeling. So create a fresh `instll`. + let mut instll = InstLinkedList::new(&self.m); + let skipping = self + .m + .iter_skipping_insts() + .skip_while(|(_, inst)| !matches!(inst, Inst::TraceBodyStart)) + .collect::>(); + for (iidx, inst) in skipping.into_iter() { + match inst { + Inst::TraceHeaderStart | Inst::TraceHeaderEnd => panic!(), + Inst::TraceBodyStart | Inst::TraceBodyEnd => (), + _ => { + self.opt_inst(iidx)?; + self.cse(&mut instll, iidx); } } } @@ -468,14 +483,14 @@ impl Opt { /// Attempt common subexpression elimination on `iidx`, replacing it with a `Copy` or /// `Tombstone` if possible. - fn cse(&mut self, iidx: InstIdx) { + fn cse(&mut self, instll: &mut InstLinkedList, iidx: InstIdx) { let inst = match self.m.inst_nocopy(iidx) { // If this instruction is already a `Copy`, then there is nothing for CSE to do. None => return, // There's no point in trying CSE on a `Const` or `Tombstone`. Some(Inst::Const(_)) | Some(Inst::Tombstone) => return, Some(inst @ Inst::Guard(ginst)) => { - for (_, back_inst) in self.instll.rev_iter(&self.m, inst) { + for (_, back_inst) in instll.rev_iter(&self.m, inst) { if let Inst::Guard(back_ginst) = back_inst { if ginst.cond(&self.m) == back_ginst.cond(&self.m) && ginst.expect() == back_ginst.expect() @@ -497,7 +512,7 @@ impl Opt { } // Can we CSE the instruction at `iidx`? - for (back_iidx, back_inst) in self.instll.rev_iter(&self.m, inst) { + for (back_iidx, back_inst) in instll.rev_iter(&self.m, inst) { if inst.decopy_eq(&self.m, back_inst) { self.m.replace(iidx, Inst::Copy(back_iidx)); return; @@ -513,7 +528,7 @@ impl Opt { // things that aren't CSE candidates, which saves us some pointless work. // 2. Haven't been turned into `Copy`s. So only if we've failed to CSE a given // instruction is it worth pushing to the `instll`. - self.instll.push(iidx, inst); + instll.push(iidx, inst); } /// Optimise an [ICmpInst]. From f3220fc1c4e3ef73dd7ada8a7971e198edd25fa9 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 19 Dec 2024 15:32:21 +0000 Subject: [PATCH 08/10] Have `dup_and_remap_vars` take a `Module`. We will soon need this. --- ykrt/src/compile/jitc_yk/jit_ir/mod.rs | 33 +++++++++++++++----------- ykrt/src/compile/jitc_yk/opt/mod.rs | 6 ++--- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs index ab5be9fb4..4c36f8635 100644 --- a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs +++ b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs @@ -1639,14 +1639,14 @@ impl Inst { f: F, ) -> Result where - F: Fn(InstIdx) -> Operand, + F: Fn(&Module, InstIdx) -> Operand, { let mapper = |m: &Module, x: &PackedOperand| match x.unpack(m) { - Operand::Var(iidx) => PackedOperand::new(&f(iidx)), + Operand::Var(iidx) => PackedOperand::new(&f(m, iidx)), Operand::Const(_) => *x, }; - let op_mapper = |x: &Operand| match x { - Operand::Var(iidx) => f(*iidx), + let op_mapper = |m: &Module, x: &Operand| match x { + Operand::Var(iidx) => f(m, *iidx), Operand::Const(c) => Operand::Const(*c), }; let inst = match self { @@ -1663,7 +1663,7 @@ impl Inst { // Clone and map arguments. let args = dc .iter_args_idx() - .map(|x| op_mapper(&m.arg(x))) + .map(|x| op_mapper(m, &m.arg(x))) .collect::>(); let dc = DirectCallInst::new(m, dc.target, args)?; Inst::Call(dc) @@ -1673,14 +1673,14 @@ impl Inst { // Clone and map arguments. let args = ic .iter_args_idx() - .map(|x| op_mapper(&m.arg(x))) + .map(|x| op_mapper(m, &m.arg(x))) .collect::>(); - let icnew = IndirectCallInst::new(m, ic.ftyidx, op_mapper(&ic.target(m)), args)?; + let icnew = IndirectCallInst::new(m, ic.ftyidx, op_mapper(m, &ic.target(m)), args)?; let idx = m.push_indirect_call(icnew)?; Inst::IndirectCall(idx) } Inst::Const(c) => Inst::Const(*c), - Inst::Copy(iidx) => match f(*iidx) { + Inst::Copy(iidx) => match f(m, *iidx) { Operand::Var(iidx) => Inst::Copy(iidx), Operand::Const(cidx) => Inst::Const(cidx), }, @@ -1723,7 +1723,7 @@ impl Inst { x.safepoint, x.args .iter() - .map(|x| op_mapper(&x.unpack(m))) + .map(|x| op_mapper(m, &x.unpack(m))) .collect::>(), ) }) @@ -1764,11 +1764,6 @@ impl Inst { off: inst.off, }) } - Inst::SidetraceEnd => { - // This instruction only exists in side-traces, which don't have loops we can peel - // off. - unreachable!() - } Inst::Select(SelectInst { cond, trueval, @@ -1800,6 +1795,16 @@ impl Inst { m.trace_body_end = m.trace_body_end.iter().map(|op| mapper(m, op)).collect(); Inst::TraceBodyEnd } + Inst::TraceHeaderEnd => { + // Copy the header label into the body while remapping the operands. + m.trace_header_end = m.trace_header_end.iter().map(|op| mapper(m, op)).collect(); + Inst::TraceHeaderEnd + } + Inst::SidetraceEnd => { + // Copy the header label into the body while remapping the operands. + m.trace_header_end = m.trace_header_end.iter().map(|op| mapper(m, op)).collect(); + Inst::SidetraceEnd + } Inst::Trunc(TruncInst { val, dest_tyidx }) => Inst::Trunc(TruncInst { val: mapper(m, val), dest_tyidx: *dest_tyidx, diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index 5e68d00b3..f3f99b7c8 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -80,7 +80,7 @@ impl Opt { self.m.push(Inst::TraceBodyStart)?; // FIXME: We rely on `dup_and_remap_vars` not being idempotent here. let _ = Inst::TraceBodyStart - .dup_and_remap_vars(&mut self.m, |op_iidx: InstIdx| { + .dup_and_remap_vars(&mut self.m, |_, op_iidx: InstIdx| { Operand::Var(iidx_map[usize::from(op_iidx)]) })?; for (headop, bodyop) in self @@ -104,12 +104,12 @@ impl Opt { self.m.push(Inst::TraceBodyEnd)?; // FIXME: We rely on `dup_and_remap_vars` not being idempotent here. let _ = Inst::TraceBodyEnd - .dup_and_remap_vars(&mut self.m, |op_iidx: InstIdx| { + .dup_and_remap_vars(&mut self.m, |_, op_iidx: InstIdx| { Operand::Var(iidx_map[usize::from(op_iidx)]) })?; } _ => { - let c = inst.dup_and_remap_vars(&mut self.m, |op_iidx: InstIdx| { + let c = inst.dup_and_remap_vars(&mut self.m, |_, op_iidx: InstIdx| { Operand::Var(iidx_map[usize::from(op_iidx)]) })?; let copy_iidx = self.m.push(c)?; From 4858809e79b6aefd00ee4455dc93b08129f7ab03 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 19 Dec 2024 15:35:39 +0000 Subject: [PATCH 09/10] After optimising an instruction, rewrite all of its operands. Previously we only picked up results from the analyser if we performed other optimisations. So imagine we had this: ``` %3: i8 = ... %4: i1 = eq %3, 7 ... call f(%3) ``` then previously the call wouldn't become `f(7i8)` but would stay as `f(%3)`. This commit rewrites *every* instruction so that we pick up all the results of the analyser. We could do this more efficiently (e.g. it's not worth rewriting instructions we've already optimised), but this is good enough for now, and it leads to a meaningful difference e.g. about a 4% improvement on big_loop. --- ykrt/src/compile/jitc_yk/opt/analyse.rs | 22 +++++++++------ ykrt/src/compile/jitc_yk/opt/mod.rs | 37 ++++++++++++++++++++----- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/opt/analyse.rs b/ykrt/src/compile/jitc_yk/opt/analyse.rs index 05c453afb..f2d577eb5 100644 --- a/ykrt/src/compile/jitc_yk/opt/analyse.rs +++ b/ykrt/src/compile/jitc_yk/opt/analyse.rs @@ -4,6 +4,7 @@ use super::{ super::jit_ir::{GuardInst, Inst, InstIdx, Module, Operand, Predicate}, Value, }; +use std::cell::RefCell; /// Ongoing analysis of a trace: what value can a given instruction in the past produce? /// @@ -11,8 +12,10 @@ use super::{ /// `Const` now does not mean it would be valid to assume that at earlier points it is safe to /// assume it was also a `Const`. pub(super) struct Analyse { - /// For each instruction, what have we learnt about its [Value] so far? - values: Vec, + /// For each instruction, what have we learnt about its [Value] so far? This is a `RefCell` as + /// it allows [Analyse::op_map] to take `&self`: changing that to `&mut self` upsets a lot of + /// other parts of the system w.r.t. the borrow checker. + values: RefCell>, } impl Analyse { @@ -22,15 +25,15 @@ impl Analyse { // point. What we do know is that it is at most two times the size (though since we // don't copy over [Tombstone]s and [Copy]s it will be slightly less than that. // FIXME: Can we calculate this more accurately? - values: vec![Value::Unknown; m.insts_len() * 2], + values: RefCell::new(vec![Value::Unknown; m.insts_len() * 2]), } } /// Map `op` based on our analysis so far. In some cases this will return `op` unchanged, but /// in others it may be able to turn what looks like a variable reference into a constant. - pub(super) fn op_map(&mut self, m: &Module, op: Operand) -> Operand { + pub(super) fn op_map(&self, m: &Module, op: Operand) -> Operand { match op { - Operand::Var(iidx) => match self.values[usize::from(iidx)] { + Operand::Var(iidx) => match self.values.borrow()[usize::from(iidx)] { Value::Unknown => { // Since we last saw an `ICmp` instruction, we may have gathered new knowledge // that allows us to turn it into a constant. @@ -42,7 +45,8 @@ impl Analyse { { if pred == Predicate::Equal && m.const_(lhs_cidx) == m.const_(rhs_cidx) { - self.values[usize::from(iidx)] = Value::Const(m.true_constidx()); + self.values.borrow_mut()[usize::from(iidx)] = + Value::Const(m.true_constidx()); return Operand::Const(m.true_constidx()); } } @@ -56,12 +60,12 @@ impl Analyse { } /// Update our idea of what value the instruction at `iidx` can produce. - pub(super) fn set_value(&mut self, iidx: InstIdx, v: Value) { - self.values[usize::from(iidx)] = v; + pub(super) fn set_value(&self, iidx: InstIdx, v: Value) { + self.values.borrow_mut()[usize::from(iidx)] = v; } /// Use the guard `inst` to update our knowledge about the variable used as its condition. - pub(super) fn guard(&mut self, m: &Module, g_inst: GuardInst) { + pub(super) fn guard(&self, m: &Module, g_inst: GuardInst) { if let Operand::Var(iidx) = g_inst.cond(m) { if let Inst::ICmp(ic_inst) = m.inst(iidx) { let lhs = self.op_map(m, ic_inst.lhs(m)); diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index f3f99b7c8..998222edd 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -55,7 +55,6 @@ impl Opt { for (iidx, inst) in skipping.into_iter() { match inst { Inst::TraceHeaderStart => (), - Inst::TraceHeaderEnd => (), _ => { self.opt_inst(iidx)?; self.cse(&mut instll, iidx); @@ -130,7 +129,7 @@ impl Opt { for (iidx, inst) in skipping.into_iter() { match inst { Inst::TraceHeaderStart | Inst::TraceHeaderEnd => panic!(), - Inst::TraceBodyStart | Inst::TraceBodyEnd => (), + Inst::TraceBodyStart => (), _ => { self.opt_inst(iidx)?; self.cse(&mut instll, iidx); @@ -145,14 +144,17 @@ impl Opt { /// Optimise instruction `iidx`. fn opt_inst(&mut self, iidx: InstIdx) -> Result<(), CompilationError> { + // First rewrite the instruction so that all changes from the analyser are reflected + // straight away. Note: we deliberately do this before some of the changes below. Most + // notably we need to call `rewrite` before telling the analyser about a `Guard`: if we + // swap that order, the guard will pick up the wrong value for operand(s) related to + // whether the guard succeeds! + self.rewrite(iidx)?; + match self.m.inst(iidx) { #[cfg(test)] Inst::BlackBox(_) => (), - Inst::Const(_) - | Inst::Copy(_) - | Inst::Tombstone - | Inst::TraceHeaderStart - | Inst::TraceHeaderEnd => { + Inst::Const(_) | Inst::Copy(_) | Inst::Tombstone | Inst::TraceHeaderStart => { unreachable!() } Inst::BinOp(x) => match x.binop() { @@ -478,9 +480,30 @@ impl Opt { } _ => (), }; + Ok(()) } + /// Rewrite the instruction at `iidx`: duplicate it and remap its operands so that it reflects + /// everything learnt by the analyser. + fn rewrite(&mut self, iidx: InstIdx) -> Result<(), CompilationError> { + match self.m.inst_nocopy(iidx) { + None => Ok(()), + Some(Inst::Guard(_)) => { + // We can't safely rewrite guard operands as we pick up the result of the analysis + // on the guard itself! + Ok(()) + } + Some(inst) => { + let r = inst.dup_and_remap_vars(&mut self.m, |m, op_iidx| { + self.an.op_map(m, Operand::Var(op_iidx)) + })?; + self.m.replace(iidx, r); + Ok(()) + } + } + } + /// Attempt common subexpression elimination on `iidx`, replacing it with a `Copy` or /// `Tombstone` if possible. fn cse(&mut self, instll: &mut InstLinkedList, iidx: InstIdx) { From 4963d5baa6de0ae64877d110979cc4d5edbc49e6 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 19 Dec 2024 15:56:26 +0000 Subject: [PATCH 10/10] Add C tests for peeling. Previously it was impractical for us to write a test for peeling: limitations in the optimiser meant that you needed a complex set of things to go right for peeling to end up having visibly good effects. The previous commit to this has made it possible for us to write a small test that makes the effect of peeling obvious: in this case we are able to elide a guard in the body. Irritatingly, we have to have minor variants for hwt/swt, because the latter inserts a store. --- tests/c/peel1.c | 55 +++++++++++++++++++++++++++++++++++++++++++ tests/c/peel1.swt.c | 57 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 tests/c/peel1.c create mode 100644 tests/c/peel1.swt.c diff --git a/tests/c/peel1.c b/tests/c/peel1.c new file mode 100644 index 000000000..be256a56a --- /dev/null +++ b/tests/c/peel1.c @@ -0,0 +1,55 @@ +// ## The sanitisers fiddle with the generated code and mean we can't write the +// ## test we want. +// ignore-if: echo $RUSTFLAGS | grep "sanitizer" || test "$YKB_TRACER" = "swt" +// Compiler: +// env-var: YKB_EXTRA_CC_FLAGS=-O2 +// Run-time: +// env-var: YKD_SERIALISE_COMPILATION=1 +// env-var: YKD_LOG_IR=jit-post-opt +// stderr: +// 0 1 +// --- Begin jit-post-opt --- +// ... +// guard true, ... +// ... +// guard true, ... +// header_end [1i32, ... +// ... +// body_start ... +// ... +// guard true, ... +// body_end [1i32, ... +// --- End jit-post-opt --- +// 1 1 +// 2 1 +// 3 1 +// 4 1 + +// Check that peeling works: a constant should be discovered by `header_end` +// that allows the body to have only 1 guard where the header must have 2. + +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + YkMT *mt = yk_mt_new(NULL); + yk_mt_hot_threshold_set(mt, 0); + YkLocation loc = yk_location_new(); + + int i = 0; + NOOPT_VAL(i); + for (; i < 5; i++) { + yk_mt_control_point(mt, &loc); + int y = yk_promote(argc); + + fprintf(stderr, "%d %d\n", i, y); + } + + yk_location_drop(loc); + yk_mt_shutdown(mt); + return (EXIT_SUCCESS); +} diff --git a/tests/c/peel1.swt.c b/tests/c/peel1.swt.c new file mode 100644 index 000000000..25fa110c2 --- /dev/null +++ b/tests/c/peel1.swt.c @@ -0,0 +1,57 @@ +// ## The sanitisers fiddle with the generated code and mean we can't write the +// ## test we want. +// ignore-if: echo $RUSTFLAGS | grep "sanitizer" || test "$YKB_TRACER" = "hwt" +// Compiler: +// env-var: YKB_EXTRA_CC_FLAGS=-O2 +// Run-time: +// env-var: YKD_SERIALISE_COMPILATION=1 +// env-var: YKD_LOG_IR=jit-post-opt +// stderr: +// 0 1 +// --- Begin jit-post-opt --- +// ... +// guard true, ... +// ... +// guard true, ... +// ...... +// header_end [1i32, ... +// ... +// body_start ... +// ... +// guard true, ... +// ...... +// body_end [1i32, ... +// --- End jit-post-opt --- +// 1 1 +// 2 1 +// 3 1 +// 4 1 + +// Check that peeling works: a constant should be discovered by `header_end` +// that allows the body to have only 1 guard where the header must have 2. + +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + YkMT *mt = yk_mt_new(NULL); + yk_mt_hot_threshold_set(mt, 0); + YkLocation loc = yk_location_new(); + + int i = 0; + NOOPT_VAL(i); + for (; i < 5; i++) { + yk_mt_control_point(mt, &loc); + int y = yk_promote(argc); + + fprintf(stderr, "%d %d\n", i, y); + } + + yk_location_drop(loc); + yk_mt_shutdown(mt); + return (EXIT_SUCCESS); +}