-
-
Notifications
You must be signed in to change notification settings - Fork 818
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat[venom]: new
DFTPass
algorithm (#4255)
this commit upgrades the DFT algorithm to allow for more instruction movement and performs "multidimensional" fencing, which allows instructions to be reordered across volatile instructions if there is no effect barrier. since barriers do not truly live in the data dependency graph, it introduces a heuristic which chooses which barrier to recurse into first. it also removes the use of order ids and sorting, which improves performance. --------- Co-authored-by: Charles Cooper <[email protected]> Co-authored-by: HodanPlodky <[email protected]>
- Loading branch information
1 parent
48cb39b
commit c32b9b4
Showing
8 changed files
with
195 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,81 +1,138 @@ | ||
from collections import defaultdict | ||
|
||
import vyper.venom.effects as effects | ||
from vyper.utils import OrderedSet | ||
from vyper.venom.analysis import DFGAnalysis | ||
from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRVariable | ||
from vyper.venom.analysis import DFGAnalysis, IRAnalysesCache, LivenessAnalysis | ||
from vyper.venom.basicblock import IRBasicBlock, IRInstruction | ||
from vyper.venom.function import IRFunction | ||
from vyper.venom.passes.base_pass import IRPass | ||
|
||
|
||
class DFTPass(IRPass): | ||
function: IRFunction | ||
inst_order: dict[IRInstruction, int] | ||
inst_order_num: int | ||
inst_offspring: dict[IRInstruction, OrderedSet[IRInstruction]] | ||
visited_instructions: OrderedSet[IRInstruction] | ||
ida: dict[IRInstruction, OrderedSet[IRInstruction]] | ||
|
||
def __init__(self, analyses_cache: IRAnalysesCache, function: IRFunction): | ||
super().__init__(analyses_cache, function) | ||
self.inst_offspring = {} | ||
|
||
def run_pass(self) -> None: | ||
self.inst_offspring = {} | ||
self.visited_instructions: OrderedSet[IRInstruction] = OrderedSet() | ||
|
||
self.dfg = self.analyses_cache.request_analysis(DFGAnalysis) | ||
basic_blocks = list(self.function.get_basic_blocks()) | ||
|
||
self.function.clear_basic_blocks() | ||
for bb in basic_blocks: | ||
self._process_basic_block(bb) | ||
|
||
self.analyses_cache.invalidate_analysis(LivenessAnalysis) | ||
|
||
def _process_basic_block(self, bb: IRBasicBlock) -> None: | ||
self.function.append_basic_block(bb) | ||
|
||
self._calculate_dependency_graphs(bb) | ||
self.instructions = list(bb.pseudo_instructions) | ||
non_phi_instructions = list(bb.non_phi_instructions) | ||
|
||
self.visited_instructions = OrderedSet() | ||
for inst in non_phi_instructions: | ||
self._calculate_instruction_offspring(inst) | ||
|
||
# Compute entry points in the graph of instruction dependencies | ||
entry_instructions: OrderedSet[IRInstruction] = OrderedSet(non_phi_instructions) | ||
for inst in non_phi_instructions: | ||
to_remove = self.ida.get(inst, OrderedSet()) | ||
if len(to_remove) > 0: | ||
entry_instructions.dropmany(to_remove) | ||
|
||
entry_instructions_list = list(entry_instructions) | ||
|
||
def _process_instruction_r(self, bb: IRBasicBlock, inst: IRInstruction, offset: int = 0): | ||
for op in inst.get_outputs(): | ||
assert isinstance(op, IRVariable), f"expected variable, got {op}" | ||
uses = self.dfg.get_uses(op) | ||
# Move the terminator instruction to the end of the list | ||
self._move_terminator_to_end(entry_instructions_list) | ||
|
||
for uses_this in uses: | ||
if uses_this.parent != inst.parent or uses_this.fence_id != inst.fence_id: | ||
# don't reorder across basic block or fence boundaries | ||
continue | ||
self.visited_instructions = OrderedSet() | ||
for inst in entry_instructions_list: | ||
self._process_instruction_r(self.instructions, inst) | ||
|
||
# if the instruction is a terminator, we need to place | ||
# it at the end of the basic block | ||
# along with all the instructions that "lead" to it | ||
self._process_instruction_r(bb, uses_this, offset) | ||
bb.instructions = self.instructions | ||
assert bb.is_terminated, f"Basic block should be terminated {bb}" | ||
|
||
def _move_terminator_to_end(self, instructions: list[IRInstruction]) -> None: | ||
terminator = next((inst for inst in instructions if inst.is_bb_terminator), None) | ||
if terminator is None: | ||
raise ValueError(f"Basic block should have a terminator instruction {self.function}") | ||
instructions.remove(terminator) | ||
instructions.append(terminator) | ||
|
||
def _process_instruction_r(self, instructions: list[IRInstruction], inst: IRInstruction): | ||
if inst in self.visited_instructions: | ||
return | ||
self.visited_instructions.add(inst) | ||
self.inst_order_num += 1 | ||
|
||
if inst.is_bb_terminator: | ||
offset = len(bb.instructions) | ||
|
||
if inst.opcode == "phi": | ||
# phi instructions stay at the beginning of the basic block | ||
# and no input processing is needed | ||
# bb.instructions.append(inst) | ||
self.inst_order[inst] = 0 | ||
if inst.is_pseudo: | ||
return | ||
|
||
for op in inst.get_input_variables(): | ||
target = self.dfg.get_producing_instruction(op) | ||
assert target is not None, f"no producing instruction for {op}" | ||
if target.parent != inst.parent or target.fence_id != inst.fence_id: | ||
# don't reorder across basic block or fence boundaries | ||
continue | ||
self._process_instruction_r(bb, target, offset) | ||
children = list(self.ida[inst]) | ||
|
||
self.inst_order[inst] = self.inst_order_num + offset | ||
def key(x): | ||
cost = inst.operands.index(x.output) if x.output in inst.operands else 0 | ||
return cost - len(self.inst_offspring[x]) * 0.5 | ||
|
||
def _process_basic_block(self, bb: IRBasicBlock) -> None: | ||
self.function.append_basic_block(bb) | ||
# heuristic: sort by size of child dependency graph | ||
children.sort(key=key) | ||
|
||
for inst in bb.instructions: | ||
inst.fence_id = self.fence_id | ||
if inst.is_volatile: | ||
self.fence_id += 1 | ||
for dep_inst in children: | ||
self._process_instruction_r(instructions, dep_inst) | ||
|
||
# We go throught the instructions and calculate the order in which they should be executed | ||
# based on the data flow graph. This order is stored in the inst_order dictionary. | ||
# We then sort the instructions based on this order. | ||
self.inst_order = {} | ||
self.inst_order_num = 0 | ||
for inst in bb.instructions: | ||
self._process_instruction_r(bb, inst) | ||
instructions.append(inst) | ||
|
||
bb.instructions.sort(key=lambda x: self.inst_order[x]) | ||
def _calculate_dependency_graphs(self, bb: IRBasicBlock) -> None: | ||
# ida: instruction dependency analysis | ||
self.ida = defaultdict(OrderedSet) | ||
|
||
def run_pass(self) -> None: | ||
self.dfg = self.analyses_cache.request_analysis(DFGAnalysis) | ||
non_phis = list(bb.non_phi_instructions) | ||
|
||
self.fence_id = 0 | ||
self.visited_instructions: OrderedSet[IRInstruction] = OrderedSet() | ||
# | ||
# Compute dependency graph | ||
# | ||
last_write_effects: dict[effects.Effects, IRInstruction] = {} | ||
last_read_effects: dict[effects.Effects, IRInstruction] = {} | ||
|
||
basic_blocks = list(self.function.get_basic_blocks()) | ||
for inst in non_phis: | ||
for op in inst.operands: | ||
dep = self.dfg.get_producing_instruction(op) | ||
if dep is not None and dep.parent == bb: | ||
self.ida[inst].add(dep) | ||
|
||
self.function.clear_basic_blocks() | ||
for bb in basic_blocks: | ||
self._process_basic_block(bb) | ||
write_effects = inst.get_write_effects() | ||
read_effects = inst.get_read_effects() | ||
|
||
for write_effect in write_effects: | ||
if write_effect in last_read_effects: | ||
self.ida[inst].add(last_read_effects[write_effect]) | ||
last_write_effects[write_effect] = inst | ||
|
||
for read_effect in read_effects: | ||
if read_effect in last_write_effects and last_write_effects[read_effect] != inst: | ||
self.ida[inst].add(last_write_effects[read_effect]) | ||
last_read_effects[read_effect] = inst | ||
|
||
def _calculate_instruction_offspring(self, inst: IRInstruction): | ||
if inst in self.inst_offspring: | ||
return self.inst_offspring[inst] | ||
|
||
self.inst_offspring[inst] = self.ida[inst].copy() | ||
|
||
deps = self.ida[inst] | ||
for dep_inst in deps: | ||
assert inst.parent == dep_inst.parent | ||
if dep_inst.opcode == "store": | ||
continue | ||
res = self._calculate_instruction_offspring(dep_inst) | ||
self.inst_offspring[inst] |= res | ||
|
||
return self.inst_offspring[inst] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters