Skip to content

Commit

Permalink
Dead load / store analysis and optimisation.
Browse files Browse the repository at this point in the history
This commit adds a basic dead load / store optimisation. It is super
conservative: our alias analysis only does something useful when two
pointers derive from the same instruction and have a different offset.
In other words, we basically track `PtrAdd`s, since these guarantee that
our alias analysis is precise. We can clearly be somewhat more liberal,
but that will rapidly require deeper thought.

In somewhat more detail, the new `HeapValues` struct associates
`Address`es with values. Roughly speaking, each load adds more
information to this struct; stores will invalidate all previous loads /
stores unless we are entirely sure they don't alias; and barriers
invalidate all previous loads / stores.

Even though this is somewhat basic, it can still do some surprisingly
powerful optimisations. For example, give this trace IR (which is a
simplification of what we see in big_loop):

```
%0: ptr = param 0
%1: i8 = load %0
%2: i1 = eq %1, 3i8
guard true, %2, []
*%0 = 3i8
```

we determine that the store at instruction 5 is dead: it is writing back
a value to the heap (3i8) we know for sure (via a load and guard) is
already stored at that address. It is then turned into a tombstone.

The tests in `opt/mod.rs` give a wider view of what this can optimise.
The tests in `heapvalues.rs` give an indication of the detailed view of
our alias analysis.
  • Loading branch information
ltratt committed Jan 7, 2025
1 parent 8bd72c1 commit 03c055b
Show file tree
Hide file tree
Showing 6 changed files with 629 additions and 40 deletions.
3 changes: 2 additions & 1 deletion ykrt/src/compile/jitc_yk/codegen/x64/rev_analyse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,9 @@ impl<'a> RevAnalyse<'a> {

fn analyse(&mut self, iidx: InstIdx, inst: Inst) {
if self.used_insts.get(usize::from(iidx)).unwrap()
|| inst.is_internal_inst()
|| inst.is_guard()
|| inst.has_store_effect(self.m)
|| inst.is_barrier(self.m)
{
self.used_insts.set(usize::from(iidx), true);

Expand Down
3 changes: 2 additions & 1 deletion ykrt/src/compile/jitc_yk/jit_ir/dead_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ impl Module {
let mut tombstone = Vob::from_elem(false, usize::from(self.last_inst_idx()) + 1);
for (iidx, inst) in self.iter_skipping_insts().rev() {
if used.get(usize::from(iidx)).unwrap()
|| inst.is_internal_inst()
|| inst.is_guard()
|| inst.has_store_effect(self)
|| inst.is_barrier(self)
{
used.set(usize::from(iidx), true);
inst.map_operand_vars(self, &mut |x| {
Expand Down
52 changes: 33 additions & 19 deletions ykrt/src/compile/jitc_yk/jit_ir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,11 +400,11 @@ impl Module {
pub(crate) fn iter_skipping_insts(
&self,
) -> impl DoubleEndedIterator<Item = (InstIdx, Inst)> + '_ {
// The `unchecked_from` is safe because we know from `Self::push` that we can't have
// exceeded `InstIdx`'s bounds.
(0..self.insts.len()).filter_map(|i| {
let inst = &self.insts[i];
if !matches!(inst, Inst::Const(_) | Inst::Copy(_) | Inst::Tombstone) {
// The `unchecked_from` is safe because we know from `Self::push` that we can't have
// exceeded `InstIdx`'s bounds.
Some((InstIdx::unchecked_from(i), *inst))
} else {
None
Expand Down Expand Up @@ -1530,30 +1530,34 @@ impl Inst {
/// Does this instruction have a store side effect?
pub(crate) fn has_store_effect(&self, m: &Module) -> bool {
match self {
#[cfg(test)]
Inst::BlackBox(_) => true,
Inst::Copy(x) => m.inst_raw(*x).has_store_effect(m),
Inst::Store(_) => true,
Inst::Call(_) | Inst::IndirectCall(_) => false,
Inst::Call(_) | Inst::IndirectCall(_) => true,
_ => false,
}
}

/// Is this instruction a compiler barrier / fence? Our simple semantics are that barriers
/// cannot be reordered at all. Note that load/store effects are not considered
/// barriers.
pub(crate) fn is_barrier(&self, m: &Module) -> bool {
match self {
#[cfg(test)]
Inst::BlackBox(_) => true,
Inst::Copy(x) => m.inst_raw(*x).is_barrier(m),
Inst::Guard(_) => true,
Inst::Call(_) | Inst::IndirectCall(_) => true,
/// Is this instruction an "internal" trace instruction (e.g. `TraceHeaderStart` /
/// `TraceHeaderEnd`)? Such instructions must not be removed, and should be thought of as
/// similar to a barrier.
pub(crate) fn is_internal_inst(&self) -> bool {
matches!(
self,
Inst::TraceHeaderStart
| Inst::TraceHeaderEnd
| Inst::TraceBodyStart
| Inst::TraceBodyEnd
| Inst::SidetraceEnd => true,
_ => false,
}
| Inst::TraceHeaderEnd
| Inst::TraceBodyStart
| Inst::TraceBodyEnd
| Inst::SidetraceEnd
)
}

/// Is this instruction a guard?
///
/// This is a convenience function to allow an easy mirror of `is_internal_inst`.
pub(crate) fn is_guard(&self) -> bool {
matches!(self, Inst::Guard(_))
}

/// Apply the function `f` to each of this instruction's [Operand]s iff they are of type
Expand Down Expand Up @@ -2270,6 +2274,12 @@ impl LoadInst {
pub(crate) fn tyidx(&self) -> TyIdx {
self.tyidx
}

/// Is this a volatile load?
pub(crate) fn is_volatile(&self) -> bool {
// FIXME: We don't yet record this in AOT?!
false
}
}

/// The `Param` instruction.
Expand Down Expand Up @@ -2532,6 +2542,10 @@ impl StoreInst {
pub(crate) fn tgt(&self, m: &Module) -> Operand {
self.tgt.unpack(m)
}

pub(crate) fn is_volatile(&self) -> bool {
self.volatile
}
}

/// The operands for a [Inst::Select]
Expand Down
51 changes: 46 additions & 5 deletions ykrt/src/compile/jitc_yk/opt/analyse.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
//! Analyse a trace and gradually refine what values we know a previous instruction can produce.
use super::{
super::jit_ir::{GuardInst, Inst, InstIdx, Module, Operand, Predicate},
Value,
super::jit_ir::{ConstIdx, GuardInst, Inst, InstIdx, Module, Operand, Predicate},
heapvalues::{Address, HeapValues},
};
use std::cell::RefCell;

#[derive(Clone, Debug)]
pub(super) enum Value {
Unknown,
Const(ConstIdx),
}

impl Value {
fn to_operand(&self) -> Operand {
match self {
Value::Unknown => todo!(),
Value::Const(cidx) => Operand::Const(*cidx),
}
}
}

/// Ongoing analysis of a trace: what value can a given instruction in the past produce?
///
/// Note that the analysis is forward-looking: just because an instruction's `Value` is (say) a
Expand All @@ -16,6 +31,7 @@ pub(super) struct Analyse {
/// it allows [Analyse::op_map] to take `&self`: changing that to `&mut self` upsets a lot of
/// other parts of the system w.r.t. the borrow checker.
values: RefCell<Vec<Value>>,
heapvalues: RefCell<HeapValues>,
}

impl Analyse {
Expand All @@ -26,6 +42,7 @@ impl Analyse {
// don't copy over [Tombstone]s and [Copy]s it will be slightly less than that.
// FIXME: Can we calculate this more accurately?
values: RefCell::new(vec![Value::Unknown; m.insts_len() * 2]),
heapvalues: RefCell::new(HeapValues::new()),
}
}

Expand Down Expand Up @@ -60,8 +77,32 @@ impl Analyse {
}

/// Update our idea of what value the instruction at `iidx` can produce.
pub(super) fn set_value(&self, iidx: InstIdx, v: Value) {
self.values.borrow_mut()[usize::from(iidx)] = v;
pub(super) fn set_value(&self, m: &Module, iidx: InstIdx, v: Value) {
self.values.borrow_mut()[usize::from(iidx)] = v.clone();
if let Some(Inst::Load(linst)) = m.inst_nocopy(iidx) {
let addr = Address::from_operand(m, linst.operand(m));
self.heapvalues.borrow_mut().load(m, addr, v.to_operand());
}
}

/// What, if any, is the currently known value of `bytesize` bytes stored at `addr`?
pub(super) fn heapvalue(&self, m: &Module, addr: Address, bytesize: usize) -> Option<Operand> {
self.heapvalues.borrow_mut().get(m, addr, bytesize)
}

/// Associate the value derived from a load in `op` with the address `addr`.
pub(super) fn push_heap_load(&self, m: &Module, addr: Address, op: Operand) {
self.heapvalues.borrow_mut().load(m, addr, op);
}

/// Associate the value derived from a store in `op` with the address `addr`.
pub(super) fn push_heap_store(&self, m: &Module, addr: Address, op: Operand) {
self.heapvalues.borrow_mut().store(m, addr, op);
}

/// Clear all known heap values.
pub(super) fn heap_barrier(&self) {
self.heapvalues.borrow_mut().barrier();
}

/// Use the guard `inst` to update our knowledge about the variable used as its condition.
Expand All @@ -81,7 +122,7 @@ impl Analyse {
if (g_inst.expect && pred == Predicate::Equal)
|| (!g_inst.expect && pred == Predicate::NotEqual)
{
self.set_value(iidx, Value::Const(cidx));
self.set_value(m, iidx, Value::Const(cidx));
}
}
(&Operand::Var(_), &Operand::Var(_)) => (),
Expand Down
192 changes: 192 additions & 0 deletions ykrt/src/compile/jitc_yk/opt/heapvalues.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
//! Build up knowledge about values on the heap. As we optimise and analyse a trace, we build up
//! knowledge about the possible values that can be stored at different addresses. This module
//! provides a way of keeping track of what values we know about at a given point in the trace.
//!
//! Broadly speaking, loads add new information; stores tend to remove most old information and add
//! new information; and barriers remove all information.
use super::super::jit_ir::{Inst, InstIdx, Module, Operand};
use std::collections::HashMap;

/// An abstract "address" representing a location in RAM.
///
/// Users of this module should not need to use this `enum`'s variants: if you find yourself
/// needing to do so, then consider whether a refactoring of this code might be in order.
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub(super) enum Address {
/// This address is derived from a pointer stored in `InstIdx` + the constant offset `i32`
/// values in bytes.
PtrPlusOff(InstIdx, i32),
/// This address is a constant.
#[allow(unused)]
Const(usize),
}

impl Address {
/// Create an [Address] from an [Operand].
pub(super) fn from_operand(m: &Module, op: Operand) -> Address {
match op {
Operand::Var(mut iidx) => {
// We canonicalise pointers as "instruction + off" so that we can cut out any
// intermediate `PtrAdd`s.
let mut off = 0;
while let Inst::PtrAdd(pa_inst) = m.inst_nocopy(iidx).unwrap() {
match pa_inst.ptr(m) {
Operand::Var(ptr_iidx) => {
off += pa_inst.off();
iidx = ptr_iidx;
}
Operand::Const(_) => todo!(),
}
}
Address::PtrPlusOff(iidx, off)
}
Operand::Const(_) => todo!(),
}
}
}

/// The currently known values on the heap.
///
/// This must be used as part of a linear scan over a trace: as you move from one instruction to
/// the next, knowledge will be gained (e.g. because of a new load) and lost (e.g. because a store
/// invalidates some or all of our previous knowledge).
pub(super) struct HeapValues {
/// The heap values we currently know about.
hv: HashMap<Address, Operand>,
}

impl HeapValues {
pub(super) fn new() -> Self {
HeapValues { hv: HashMap::new() }
}

/// What is the currently known value at `addr` of `bytesize` bytes? Returns `None` if no value
/// of that size is known at that address.
pub(super) fn get(&self, m: &Module, addr: Address, bytesize: usize) -> Option<Operand> {
match self.hv.get(&addr) {
Some(x) if x.byte_size(m) == bytesize => Some(x.clone()),
_ => None,
}
}

/// Record the value `v` as known to be at `addr` as a result of a load. Note: `v`'s bytesize
/// *must* match the number of bytes stored at `addr`.
pub(super) fn load(&mut self, _m: &Module, addr: Address, v: Operand) {
// We don't need to invalidate anything for loads: we can safely have aliases.
self.hv.insert(addr, v);
}

/// Record the value `v` as known to be at `addr` as a result of a store. Note: `v`'s bytesize
/// *must* match the number of bytes stored at `addr`.
pub(super) fn store(&mut self, m: &Module, addr: Address, v: Operand) {
// We now need to perform alias analysis to see if this new value invalidates some or all
// of our previous knowledge.
match addr {
Address::PtrPlusOff(iidx, off) => {
let off = isize::try_from(off).unwrap();
let op_bytesize = isize::try_from(v.byte_size(m)).unwrap();
// We are ultra conservative here: we only say "these don't overlap" if two stores
// ultimately reference the same SSA variable with pointer adds. In other words, if
// we're writing 8 bytes and we're storing to `%3 + 8` and `%3 + 24` we can be
// entirely sure the stores don't overlap: in any other situation, we assume
// overlap is possible. This can be relaxed in the future.
self.hv.retain(|hv_addr, _| match hv_addr {
Address::PtrPlusOff(hv_iidx, hv_off) => {
let hv_off = isize::try_from(*hv_off).unwrap();
let hv_bytesize =
isize::try_from(m.inst_nocopy(*hv_iidx).unwrap().def_byte_size(m))
.unwrap();
iidx == *hv_iidx
&& (off + op_bytesize <= hv_off || hv_off + hv_bytesize <= off)
}
Address::Const(_) => false,
});
self.hv.insert(addr, v);
}
Address::Const(_) => todo!(),
}
}

/// Record a barrier instruction as having been encountered. This will invalidate all of our
/// existing heap knowledge.
pub(super) fn barrier(&mut self) {
self.hv.clear();
}
}

#[cfg(test)]
mod test {
use super::*;
use crate::compile::jitc_yk::jit_ir::{Const, Ty};

#[test]
fn basic() {
// We only need to have the `ptr_add`s in the module for this test.
let mut m = Module::from_str(
"
entry:
%0: ptr = param 0
%1: ptr = ptr_add %0, 1
%2: ptr = ptr_add %1, -1
%3: ptr = ptr_add %0, 8
%4: ptr = load %3
",
);
let mut hv = HeapValues::new();

// Add a single load
let addr0 = Address::from_operand(&m, Operand::Var(InstIdx::unchecked_from(0)));
assert!(hv.get(&m, addr0.clone(), 1).is_none());
let cidx0 = m.insert_const(Const::Int(m.int8_tyidx(), 0)).unwrap();
hv.load(&m, addr0.clone(), Operand::Const(cidx0));
assert_eq!(hv.hv.len(), 1);
assert_eq!(hv.get(&m, addr0.clone(), 1), Some(Operand::Const(cidx0)));

// Add a non-overlapping load
let addr1 = Address::from_operand(&m, Operand::Var(InstIdx::unchecked_from(1)));
let cidx1 = m.insert_const(Const::Int(m.int8_tyidx(), 1)).unwrap();
hv.load(&m, addr1.clone(), Operand::Const(cidx1));
assert_eq!(hv.hv.len(), 2);
assert_eq!(hv.get(&m, addr0.clone(), 1), Some(Operand::Const(cidx0)));
assert_eq!(hv.get(&m, addr1.clone(), 1), Some(Operand::Const(cidx1)));

// Check that ptr_adds are canonicalised.
let addr2 = Address::from_operand(&m, Operand::Var(InstIdx::unchecked_from(2)));
assert_eq!(hv.get(&m, addr2.clone(), 1), Some(Operand::Const(cidx0)));
assert!(hv.get(&m, addr2.clone(), 2).is_none());

// Add a store that replaces our knowledge of the second load but preserves the first.
let cidx2 = m.insert_const(Const::Int(m.int8_tyidx(), 2)).unwrap();
hv.store(&m, addr2.clone(), Operand::Const(cidx2));
assert_eq!(hv.hv.len(), 2);
assert_eq!(hv.get(&m, addr0.clone(), 1), Some(Operand::Const(cidx2)));
assert_eq!(hv.get(&m, addr1.clone(), 1), Some(Operand::Const(cidx1)));

// Add an overlapping i64 store which should remove information about both preceding loads.
let int64_tyidx = m.insert_ty(Ty::Integer(64)).unwrap();
let cidx3 = m.insert_const(Const::Int(int64_tyidx, 3)).unwrap();
hv.store(&m, addr2.clone(), Operand::Const(cidx3));
assert_eq!(hv.hv.len(), 1);
assert_eq!(hv.get(&m, addr0.clone(), 8), Some(Operand::Const(cidx3)));
assert!(hv.get(&m, addr0.clone(), 1).is_none());
assert!(hv.get(&m, addr1.clone(), 1).is_none());

// Add an overlapping i8 store which should remove information about the i64 load.
let cidx4 = m.insert_const(Const::Int(m.int8_tyidx(), 4)).unwrap();
hv.store(&m, addr1.clone(), Operand::Const(cidx4));
assert_eq!(hv.hv.len(), 1);
assert_eq!(hv.get(&m, addr1.clone(), 1), Some(Operand::Const(cidx4)));
assert!(hv.get(&m, addr0.clone(), 1).is_none());
assert!(hv.get(&m, addr0.clone(), 8).is_none());

// Add a store which we can't prove doesn't alias.
let addr4 = Address::from_operand(&m, Operand::Var(InstIdx::unchecked_from(4)));
let cidx5 = m.insert_const(Const::Int(m.int8_tyidx(), 5)).unwrap();
hv.store(&m, addr4.clone(), Operand::Const(cidx5));
assert_eq!(hv.hv.len(), 1);
assert_eq!(hv.get(&m, addr4.clone(), 1), Some(Operand::Const(cidx5)));

hv.barrier();
assert_eq!(hv.hv.len(), 0);
}
}
Loading

0 comments on commit 03c055b

Please sign in to comment.