Dead load / store analysis and optimisation.

This commit adds a basic dead load / store optimisation. It is super conservative: our alias analysis only does something useful when two pointers derive from the same instruction and have a different offset. In other words, we basically track `PtrAdd`s, since these guarantee that our alias analysis is precise. We can clearly be somewhat more liberal, but that will rapidly require deeper thought. In somewhat more detail, the new `HeapValues` struct associates `Address`es with values. Roughly speaking, each load adds more information to this struct; stores will invalidate all previous loads / stores unless we are entirely sure they don't alias; and barriers invalidate all previous loads / stores. Even though this is somewhat basic, it can still do some surprisingly powerful optimisations. For example, give this trace IR (which is a simplification of what we see in big_loop): ``` %0: ptr = param 0 %1: i8 = load %0 %2: i1 = eq %1, 3i8 guard true, %2, [] *%0 = 3i8 ``` we determine that the store at instruction 5 is dead: it is writing back a value to the heap (3i8) we know for sure (via a load and guard) is already stored at that address. It is then turned into a tombstone. The tests in `opt/mod.rs` give a wider view of what this can optimise. The tests in `heapvalues.rs` give an indication of the detailed view of our alias analysis.
ykjit · Jan 7, 2025 · 03c055b · 03c055b
1 parent 8bd72c1
commit 03c055b
Show file tree

Hide file tree

Showing 6 changed files with 629 additions and 40 deletions.
diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs b/ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs
@@ -138,8 +138,9 @@ impl<'a> RevAnalyse<'a> {
 
     fn analyse(&mut self, iidx: InstIdx, inst: Inst) {
         if self.used_insts.get(usize::from(iidx)).unwrap()
+            || inst.is_internal_inst()
+            || inst.is_guard()
             || inst.has_store_effect(self.m)
-            || inst.is_barrier(self.m)
         {
             self.used_insts.set(usize::from(iidx), true);
 

diff --git a/ykrt/src/compile/jitc_yk/jit_ir/dead_code.rs b/ykrt/src/compile/jitc_yk/jit_ir/dead_code.rs
@@ -13,8 +13,9 @@ impl Module {
         let mut tombstone = Vob::from_elem(false, usize::from(self.last_inst_idx()) + 1);
         for (iidx, inst) in self.iter_skipping_insts().rev() {
             if used.get(usize::from(iidx)).unwrap()
+                || inst.is_internal_inst()
+                || inst.is_guard()
                 || inst.has_store_effect(self)
-                || inst.is_barrier(self)
             {
                 used.set(usize::from(iidx), true);
                 inst.map_operand_vars(self, &mut |x| {

diff --git a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs
@@ -400,11 +400,11 @@ impl Module {
     pub(crate) fn iter_skipping_insts(
         &self,
     ) -> impl DoubleEndedIterator<Item = (InstIdx, Inst)> + '_ {
-        // The `unchecked_from` is safe because we know from `Self::push` that we can't have
-        // exceeded `InstIdx`'s bounds.
         (0..self.insts.len()).filter_map(|i| {
             let inst = &self.insts[i];
             if !matches!(inst, Inst::Const(_) | Inst::Copy(_) | Inst::Tombstone) {
+                // The `unchecked_from` is safe because we know from `Self::push` that we can't have
+                // exceeded `InstIdx`'s bounds.
                 Some((InstIdx::unchecked_from(i), *inst))
             } else {
                 None
@@ -1530,30 +1530,34 @@ impl Inst {
     /// Does this instruction have a store side effect?
     pub(crate) fn has_store_effect(&self, m: &Module) -> bool {
         match self {
+            #[cfg(test)]
+            Inst::BlackBox(_) => true,
             Inst::Copy(x) => m.inst_raw(*x).has_store_effect(m),
             Inst::Store(_) => true,
-            Inst::Call(_) | Inst::IndirectCall(_) => false,
+            Inst::Call(_) | Inst::IndirectCall(_) => true,
             _ => false,
         }
     }
 
-    /// Is this instruction a compiler barrier / fence? Our simple semantics are that barriers
-    /// cannot be reordered at all. Note that load/store effects are not considered
-    /// barriers.
-    pub(crate) fn is_barrier(&self, m: &Module) -> bool {
-        match self {
-            #[cfg(test)]
-            Inst::BlackBox(_) => true,
-            Inst::Copy(x) => m.inst_raw(*x).is_barrier(m),
-            Inst::Guard(_) => true,
-            Inst::Call(_) | Inst::IndirectCall(_) => true,
+    /// Is this instruction an "internal" trace instruction (e.g. `TraceHeaderStart` /
+    /// `TraceHeaderEnd`)? Such instructions must not be removed, and should be thought of as
+    /// similar to a barrier.
+    pub(crate) fn is_internal_inst(&self) -> bool {
+        matches!(
+            self,
             Inst::TraceHeaderStart
-            | Inst::TraceHeaderEnd
-            | Inst::TraceBodyStart
-            | Inst::TraceBodyEnd
-            | Inst::SidetraceEnd => true,
-            _ => false,
-        }
+                | Inst::TraceHeaderEnd
+                | Inst::TraceBodyStart
+                | Inst::TraceBodyEnd
+                | Inst::SidetraceEnd
+        )
+    }
+
+    /// Is this instruction a guard?
+    ///
+    /// This is a convenience function to allow an easy mirror of `is_internal_inst`.
+    pub(crate) fn is_guard(&self) -> bool {
+        matches!(self, Inst::Guard(_))
     }
 
     /// Apply the function `f` to each of this instruction's [Operand]s iff they are of type
@@ -2270,6 +2274,12 @@ impl LoadInst {
     pub(crate) fn tyidx(&self) -> TyIdx {
         self.tyidx
     }
+
+    /// Is this a volatile load?
+    pub(crate) fn is_volatile(&self) -> bool {
+        // FIXME: We don't yet record this in AOT?!
+        false
+    }
 }
 
 /// The `Param` instruction.
@@ -2532,6 +2542,10 @@ impl StoreInst {
     pub(crate) fn tgt(&self, m: &Module) -> Operand {
         self.tgt.unpack(m)
     }
+
+    pub(crate) fn is_volatile(&self) -> bool {
+        self.volatile
+    }
 }
 
 /// The operands for a [Inst::Select]

diff --git a/ykrt/src/compile/jitc_yk/opt/analyse.rs b/ykrt/src/compile/jitc_yk/opt/analyse.rs
@@ -1,11 +1,26 @@
 //! Analyse a trace and gradually refine what values we know a previous instruction can produce.
 
 use super::{
-    super::jit_ir::{GuardInst, Inst, InstIdx, Module, Operand, Predicate},
-    Value,
+    super::jit_ir::{ConstIdx, GuardInst, Inst, InstIdx, Module, Operand, Predicate},
+    heapvalues::{Address, HeapValues},
 };
 use std::cell::RefCell;
 
+#[derive(Clone, Debug)]
+pub(super) enum Value {
+    Unknown,
+    Const(ConstIdx),
+}
+
+impl Value {
+    fn to_operand(&self) -> Operand {
+        match self {
+            Value::Unknown => todo!(),
+            Value::Const(cidx) => Operand::Const(*cidx),
+        }
+    }
+}
+
 /// Ongoing analysis of a trace: what value can a given instruction in the past produce?
 ///
 /// Note that the analysis is forward-looking: just because an instruction's `Value` is (say) a
@@ -16,6 +31,7 @@ pub(super) struct Analyse {
     /// it allows [Analyse::op_map] to take `&self`: changing that to `&mut self` upsets a lot of
     /// other parts of the system w.r.t. the borrow checker.
     values: RefCell<Vec<Value>>,
+    heapvalues: RefCell<HeapValues>,
 }
 
 impl Analyse {
@@ -26,6 +42,7 @@ impl Analyse {
             // don't copy over [Tombstone]s and [Copy]s it will be slightly less than that.
             // FIXME: Can we calculate this more accurately?
             values: RefCell::new(vec![Value::Unknown; m.insts_len() * 2]),
+            heapvalues: RefCell::new(HeapValues::new()),
         }
     }
 
@@ -60,8 +77,32 @@ impl Analyse {
     }
 
     /// Update our idea of what value the instruction at `iidx` can produce.
-    pub(super) fn set_value(&self, iidx: InstIdx, v: Value) {
-        self.values.borrow_mut()[usize::from(iidx)] = v;
+    pub(super) fn set_value(&self, m: &Module, iidx: InstIdx, v: Value) {
+        self.values.borrow_mut()[usize::from(iidx)] = v.clone();
+        if let Some(Inst::Load(linst)) = m.inst_nocopy(iidx) {
+            let addr = Address::from_operand(m, linst.operand(m));
+            self.heapvalues.borrow_mut().load(m, addr, v.to_operand());
+        }
+    }
+
+    /// What, if any, is the currently known value of `bytesize` bytes stored at `addr`?
+    pub(super) fn heapvalue(&self, m: &Module, addr: Address, bytesize: usize) -> Option<Operand> {
+        self.heapvalues.borrow_mut().get(m, addr, bytesize)
+    }
+
+    /// Associate the value derived from a load in `op` with the address `addr`.
+    pub(super) fn push_heap_load(&self, m: &Module, addr: Address, op: Operand) {
+        self.heapvalues.borrow_mut().load(m, addr, op);
+    }
+
+    /// Associate the value derived from a store in `op` with the address `addr`.
+    pub(super) fn push_heap_store(&self, m: &Module, addr: Address, op: Operand) {
+        self.heapvalues.borrow_mut().store(m, addr, op);
+    }
+
+    /// Clear all known heap values.
+    pub(super) fn heap_barrier(&self) {
+        self.heapvalues.borrow_mut().barrier();
     }
 
     /// Use the guard `inst` to update our knowledge about the variable used as its condition.
@@ -81,7 +122,7 @@ impl Analyse {
                         if (g_inst.expect && pred == Predicate::Equal)
                             || (!g_inst.expect && pred == Predicate::NotEqual)
                         {
-                            self.set_value(iidx, Value::Const(cidx));
+                            self.set_value(m, iidx, Value::Const(cidx));
                         }
                     }
                     (&Operand::Var(_), &Operand::Var(_)) => (),

diff --git a/ykrt/src/compile/jitc_yk/opt/heapvalues.rs b/ykrt/src/compile/jitc_yk/opt/heapvalues.rs
@@ -0,0 +1,192 @@
+//! Build up knowledge about values on the heap. As we optimise and analyse a trace, we build up
+//! knowledge about the possible values that can be stored at different addresses. This module
+//! provides a way of keeping track of what values we know about at a given point in the trace.
+//!
+//! Broadly speaking, loads add new information; stores tend to remove most old information and add
+//! new information; and barriers remove all information.
+use super::super::jit_ir::{Inst, InstIdx, Module, Operand};
+use std::collections::HashMap;
+
+/// An abstract "address" representing a location in RAM.
+///
+/// Users of this module should not need to use this `enum`'s variants: if you find yourself
+/// needing to do so, then consider whether a refactoring of this code might be in order.
+#[derive(Clone, Debug, Eq, Hash, PartialEq)]
+pub(super) enum Address {
+    /// This address is derived from a pointer stored in `InstIdx` + the constant offset `i32`
+    /// values in bytes.
+    PtrPlusOff(InstIdx, i32),
+    /// This address is a constant.
+    #[allow(unused)]
+    Const(usize),
+}
+
+impl Address {
+    /// Create an [Address] from an [Operand].
+    pub(super) fn from_operand(m: &Module, op: Operand) -> Address {
+        match op {
+            Operand::Var(mut iidx) => {
+                // We canonicalise pointers as "instruction + off" so that we can cut out any
+                // intermediate `PtrAdd`s.
+                let mut off = 0;
+                while let Inst::PtrAdd(pa_inst) = m.inst_nocopy(iidx).unwrap() {
+                    match pa_inst.ptr(m) {
+                        Operand::Var(ptr_iidx) => {
+                            off += pa_inst.off();
+                            iidx = ptr_iidx;
+                        }
+                        Operand::Const(_) => todo!(),
+                    }
+                }
+                Address::PtrPlusOff(iidx, off)
+            }
+            Operand::Const(_) => todo!(),
+        }
+    }
+}
+
+/// The currently known values on the heap.
+///
+/// This must be used as part of a linear scan over a trace: as you move from one instruction to
+/// the next, knowledge will be gained (e.g. because of a new load) and lost (e.g. because a store
+/// invalidates some or all of our previous knowledge).
+pub(super) struct HeapValues {
+    /// The heap values we currently know about.
+    hv: HashMap<Address, Operand>,
+}
+
+impl HeapValues {
+    pub(super) fn new() -> Self {
+        HeapValues { hv: HashMap::new() }
+    }
+
+    /// What is the currently known value at `addr` of `bytesize` bytes? Returns `None` if no value
+    /// of that size is known at that address.
+    pub(super) fn get(&self, m: &Module, addr: Address, bytesize: usize) -> Option<Operand> {
+        match self.hv.get(&addr) {
+            Some(x) if x.byte_size(m) == bytesize => Some(x.clone()),
+            _ => None,
+        }
+    }
+
+    /// Record the value `v` as known to be at `addr` as a result of a load. Note: `v`'s bytesize
+    /// *must* match the number of bytes stored at `addr`.
+    pub(super) fn load(&mut self, _m: &Module, addr: Address, v: Operand) {
+        // We don't need to invalidate anything for loads: we can safely have aliases.
+        self.hv.insert(addr, v);
+    }
+
+    /// Record the value `v` as known to be at `addr` as a result of a store. Note: `v`'s bytesize
+    /// *must* match the number of bytes stored at `addr`.
+    pub(super) fn store(&mut self, m: &Module, addr: Address, v: Operand) {
+        // We now need to perform alias analysis to see if this new value invalidates some or all
+        // of our previous knowledge.
+        match addr {
+            Address::PtrPlusOff(iidx, off) => {
+                let off = isize::try_from(off).unwrap();
+                let op_bytesize = isize::try_from(v.byte_size(m)).unwrap();
+                // We are ultra conservative here: we only say "these don't overlap" if two stores
+                // ultimately reference the same SSA variable with pointer adds. In other words, if
+                // we're writing 8 bytes and we're storing to `%3 + 8` and `%3 + 24` we can be
+                // entirely sure the stores don't overlap: in any other situation, we assume
+                // overlap is possible. This can be relaxed in the future.
+                self.hv.retain(|hv_addr, _| match hv_addr {
+                    Address::PtrPlusOff(hv_iidx, hv_off) => {
+                        let hv_off = isize::try_from(*hv_off).unwrap();
+                        let hv_bytesize =
+                            isize::try_from(m.inst_nocopy(*hv_iidx).unwrap().def_byte_size(m))
+                                .unwrap();
+                        iidx == *hv_iidx
+                            && (off + op_bytesize <= hv_off || hv_off + hv_bytesize <= off)
+                    }
+                    Address::Const(_) => false,
+                });
+                self.hv.insert(addr, v);
+            }
+            Address::Const(_) => todo!(),
+        }
+    }
+
+    /// Record a barrier instruction as having been encountered. This will invalidate all of our
+    /// existing heap knowledge.
+    pub(super) fn barrier(&mut self) {
+        self.hv.clear();
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::compile::jitc_yk::jit_ir::{Const, Ty};
+
+    #[test]
+    fn basic() {
+        // We only need to have the `ptr_add`s in the module for this test.
+        let mut m = Module::from_str(
+            "
+          entry:
+            %0: ptr = param 0
+            %1: ptr = ptr_add %0, 1
+            %2: ptr = ptr_add %1, -1
+            %3: ptr = ptr_add %0, 8
+            %4: ptr = load %3
+        ",
+        );
+        let mut hv = HeapValues::new();
+
+        // Add a single load
+        let addr0 = Address::from_operand(&m, Operand::Var(InstIdx::unchecked_from(0)));
+        assert!(hv.get(&m, addr0.clone(), 1).is_none());
+        let cidx0 = m.insert_const(Const::Int(m.int8_tyidx(), 0)).unwrap();
+        hv.load(&m, addr0.clone(), Operand::Const(cidx0));
+        assert_eq!(hv.hv.len(), 1);
+        assert_eq!(hv.get(&m, addr0.clone(), 1), Some(Operand::Const(cidx0)));
+
+        // Add a non-overlapping load
+        let addr1 = Address::from_operand(&m, Operand::Var(InstIdx::unchecked_from(1)));
+        let cidx1 = m.insert_const(Const::Int(m.int8_tyidx(), 1)).unwrap();
+        hv.load(&m, addr1.clone(), Operand::Const(cidx1));
+        assert_eq!(hv.hv.len(), 2);
+        assert_eq!(hv.get(&m, addr0.clone(), 1), Some(Operand::Const(cidx0)));
+        assert_eq!(hv.get(&m, addr1.clone(), 1), Some(Operand::Const(cidx1)));
+
+        // Check that ptr_adds are canonicalised.
+        let addr2 = Address::from_operand(&m, Operand::Var(InstIdx::unchecked_from(2)));
+        assert_eq!(hv.get(&m, addr2.clone(), 1), Some(Operand::Const(cidx0)));
+        assert!(hv.get(&m, addr2.clone(), 2).is_none());
+
+        // Add a store that replaces our knowledge of the second load but preserves the first.
+        let cidx2 = m.insert_const(Const::Int(m.int8_tyidx(), 2)).unwrap();
+        hv.store(&m, addr2.clone(), Operand::Const(cidx2));
+        assert_eq!(hv.hv.len(), 2);
+        assert_eq!(hv.get(&m, addr0.clone(), 1), Some(Operand::Const(cidx2)));
+        assert_eq!(hv.get(&m, addr1.clone(), 1), Some(Operand::Const(cidx1)));
+
+        // Add an overlapping i64 store which should remove information about both preceding loads.
+        let int64_tyidx = m.insert_ty(Ty::Integer(64)).unwrap();
+        let cidx3 = m.insert_const(Const::Int(int64_tyidx, 3)).unwrap();
+        hv.store(&m, addr2.clone(), Operand::Const(cidx3));
+        assert_eq!(hv.hv.len(), 1);
+        assert_eq!(hv.get(&m, addr0.clone(), 8), Some(Operand::Const(cidx3)));
+        assert!(hv.get(&m, addr0.clone(), 1).is_none());
+        assert!(hv.get(&m, addr1.clone(), 1).is_none());
+
+        // Add an overlapping i8 store which should remove information about the i64 load.
+        let cidx4 = m.insert_const(Const::Int(m.int8_tyidx(), 4)).unwrap();
+        hv.store(&m, addr1.clone(), Operand::Const(cidx4));
+        assert_eq!(hv.hv.len(), 1);
+        assert_eq!(hv.get(&m, addr1.clone(), 1), Some(Operand::Const(cidx4)));
+        assert!(hv.get(&m, addr0.clone(), 1).is_none());
+        assert!(hv.get(&m, addr0.clone(), 8).is_none());
+
+        // Add a store which we can't prove doesn't alias.
+        let addr4 = Address::from_operand(&m, Operand::Var(InstIdx::unchecked_from(4)));
+        let cidx5 = m.insert_const(Const::Int(m.int8_tyidx(), 5)).unwrap();
+        hv.store(&m, addr4.clone(), Operand::Const(cidx5));
+        assert_eq!(hv.hv.len(), 1);
+        assert_eq!(hv.get(&m, addr4.clone(), 1), Some(Operand::Const(cidx5)));
+
+        hv.barrier();
+        assert_eq!(hv.hv.len(), 0);
+    }
+}