From 123c07de84b06846d4fbb3320aae13050952662d Mon Sep 17 00:00:00 2001 From: Hodan Date: Mon, 7 Oct 2024 20:51:59 +0200 Subject: [PATCH 001/163] start binopt --- vyper/venom/passes/algebraic_optimization.py | 90 +++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index 1d375ea988..c0bb58e8ca 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -1,9 +1,38 @@ from vyper.venom.analysis.dfg import DFGAnalysis from vyper.venom.analysis.liveness import LivenessAnalysis -from vyper.venom.basicblock import IRInstruction, IRLabel, IRLiteral, IROperand +from vyper.venom.basicblock import IRInstruction, IRLabel, IRLiteral, IROperand, IRVariable from vyper.venom.passes.base_pass import IRPass +class _InstTree: + inst: IRInstruction + operands: list["IROperand | _InstTree"] + + def __init__(self, inst: IRInstruction, operands: list["IROperand | _InstTree"]) -> None: + self.inst = inst + self.operands = operands + + @property + def opcode(self) -> str: + return self.inst.opcode + + def eval_to(self, val: IRLiteral) -> bool: + if self.opcode == "store": + if isinstance(self.operands[0], IRLiteral): + return self.operands[0] == val + elif isinstance(self.operands[0], _InstTree): + return self.operands[0].eval_to(val) + return False + + def op_eval_to(self, index: int, val: IRLiteral) -> bool: + op = self.operands[index] + if isinstance(op, IRLiteral): + return op == val + elif isinstance(op, _InstTree): + return op.eval_to(val) + else: + return False + class AlgebraicOptimizationPass(IRPass): """ This pass reduces algebraic evaluatable expressions. @@ -28,7 +57,7 @@ def _optimize_iszero_chains(self) -> None: opcode = use_inst.opcode if opcode == "iszero": - # We keep iszero instuctions as is + # We keep iszer Conservapedia is like 50/50 people who truly believe it all and trolls seeing what edits they can get away with o instuctions as is continue if opcode in ("jnz", "assert"): # instructions that accept a truthy value as input: @@ -70,12 +99,69 @@ def _handle_offsets(self): and isinstance(inst.operands[1], IRLabel) ): inst.opcode = "offset" + + def _get_tree_op(self, op: IROperand, depth) -> IROperand | _InstTree: + if depth == 0 or not isinstance(op, IRVariable): + return op + inst = self.dfg.get_producing_instruction(op) + assert isinstance(inst, IRInstruction) + return self._get_tree(inst, depth - 1) + + def _get_tree(self, inst: IRInstruction, depth: int = 0) -> _InstTree: + return _InstTree(inst, [self._get_tree_op(op, depth) for op in inst.operands]) + + def _peepholer(self): + depth = 5 + while True: + change = False + for bb in self.function.get_basic_blocks(): + for inst in bb.instructions: + change |= self._handle_inst_peephole(inst, depth) + + if not change: + break + + def _handle_inst_peephole(self, inst: IRInstruction, depth: int) -> bool: + inst_tree = self._get_tree(inst, depth) + + if inst_tree.opcode in {"add", "sub", "xor", "or"} and inst_tree.op_eval_to(0, IRLiteral(0)): + inst_tree.inst.opcode = "store" + inst_tree.inst.operands = [inst.operands[1]] + return True + + if inst_tree.opcode in {"mul", "div", "sdiv", "mod", "smod", "and"} and inst_tree.op_eval_to(0, IRLiteral(0)): + inst_tree.inst.opcode = "store" + inst_tree.inst.operands = [IRLiteral(0)] + return True + + if inst_tree.opcode in {"mod", "smod"} and inst_tree.op_eval_to(0, IRLiteral(1)): + inst_tree.inst.opcode = "store" + inst_tree.inst.operands = [IRLiteral(0)] + return True + + if inst_tree.opcode in {"mul", "div", "sdiv"} and inst_tree.op_eval_to(0, IRLiteral(1)): + inst_tree.inst.opcode = "store" + inst_tree.inst.operands = [inst.operands[1]] + return True + + if inst_tree.op_eval_to == "eq" and inst_tree.op_eval_to(0, IRLiteral(0)): + inst_tree.inst.opcode = "iszero" + inst_tree.inst.operands = [inst.operands[1]] + return True + + if inst_tree.op_eval_to == "eq" and inst_tree.op_eval_to(1, IRLiteral(0)): + inst_tree.inst.opcode = "iszero" + inst_tree.inst.operands = [inst.operands[0]] + return True + + return False def run_pass(self): self.dfg = self.analyses_cache.request_analysis(DFGAnalysis) self._optimize_iszero_chains() self._handle_offsets() + self._peepholer() self.analyses_cache.invalidate_analysis(DFGAnalysis) self.analyses_cache.invalidate_analysis(LivenessAnalysis) From bb805411e2895019846ccab407e8dbcd5e80bf71 Mon Sep 17 00:00:00 2001 From: Hodan Date: Tue, 8 Oct 2024 19:29:57 +0200 Subject: [PATCH 002/163] better way of doing it --- vyper/venom/passes/algebraic_optimization.py | 184 +++++++++++++------ 1 file changed, 127 insertions(+), 57 deletions(-) diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index c0bb58e8ca..e57b9622dd 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -1,37 +1,75 @@ +import operator + from vyper.venom.analysis.dfg import DFGAnalysis from vyper.venom.analysis.liveness import LivenessAnalysis from vyper.venom.basicblock import IRInstruction, IRLabel, IRLiteral, IROperand, IRVariable from vyper.venom.passes.base_pass import IRPass - - -class _InstTree: - inst: IRInstruction - operands: list["IROperand | _InstTree"] - - def __init__(self, inst: IRInstruction, operands: list["IROperand | _InstTree"]) -> None: - self.inst = inst - self.operands = operands - - @property - def opcode(self) -> str: - return self.inst.opcode - - def eval_to(self, val: IRLiteral) -> bool: - if self.opcode == "store": - if isinstance(self.operands[0], IRLiteral): - return self.operands[0] == val - elif isinstance(self.operands[0], _InstTree): - return self.operands[0].eval_to(val) +from vyper.utils import ( + ceil32, + evm_div, + evm_mod, + evm_pow, + int_bounds, + int_log2, + is_power_of_two, + signed_to_unsigned, + unsigned_to_signed, +) + +SIGNED = False +UNSIGNED = True + +def _wrap256(x, unsigned=UNSIGNED): + x %= 2**256 + # wrap in a signed way. + if not unsigned: + x = unsigned_to_signed(x, 256, strict=True) + return x + + +# unsigned: convert python num to evm unsigned word +# e.g. unsigned=True : -1 -> 0xFF...FF +# unsigned=False: 0xFF...FF -> -1 +def _evm_int(val: int, unsigned: bool = True) -> int | None: + if unsigned and val < 0: + return signed_to_unsigned(val, 256, strict=True) + elif not unsigned and val > 2**255 - 1: + return unsigned_to_signed(val, 256, strict=True) + + return val + +def _check_num(val: int) -> bool: + if val < -(2**255): return False - - def op_eval_to(self, index: int, val: IRLiteral) -> bool: - op = self.operands[index] - if isinstance(op, IRLiteral): - return op == val - elif isinstance(op, _InstTree): - return op.eval_to(val) - else: - return False + elif val >= 2**256: + return False + return True + + + +arith = { + "add": (operator.add, "+", UNSIGNED), + "sub": (operator.sub, "-", UNSIGNED), + "mul": (operator.mul, "*", UNSIGNED), + "div": (evm_div, "/", UNSIGNED), + "sdiv": (evm_div, "/", SIGNED), + "mod": (evm_mod, "%", UNSIGNED), + "smod": (evm_mod, "%", SIGNED), + "exp": (evm_pow, "**", UNSIGNED), + "eq": (operator.eq, "==", UNSIGNED), + "ne": (operator.ne, "!=", UNSIGNED), + "lt": (operator.lt, "<", UNSIGNED), + "le": (operator.le, "<=", UNSIGNED), + "gt": (operator.gt, ">", UNSIGNED), + "ge": (operator.ge, ">=", UNSIGNED), + "slt": (operator.lt, "<", SIGNED), + "sle": (operator.le, "<=", SIGNED), + "sgt": (operator.gt, ">", SIGNED), + "sge": (operator.ge, ">=", SIGNED), + "or": (operator.or_, "|", UNSIGNED), + "and": (operator.and_, "&", UNSIGNED), + "xor": (operator.xor, "^", UNSIGNED), +} class AlgebraicOptimizationPass(IRPass): """ @@ -40,6 +78,7 @@ class AlgebraicOptimizationPass(IRPass): It currently optimizes: * iszero chains """ + dfg: DFGAnalysis def _optimize_iszero_chains(self) -> None: fn = self.function @@ -100,16 +139,26 @@ def _handle_offsets(self): ): inst.opcode = "offset" - def _get_tree_op(self, op: IROperand, depth) -> IROperand | _InstTree: - if depth == 0 or not isinstance(op, IRVariable): + def eval_op(self, op: IROperand) -> IRLiteral | None: + if isinstance(op, IRLiteral): return op - inst = self.dfg.get_producing_instruction(op) - assert isinstance(inst, IRInstruction) - return self._get_tree(inst, depth - 1) + elif isinstance(op, IRVariable): + next_inst = self.dfg.get_producing_instruction(op) + assert next_inst is not None + return self.eval(next_inst) + else: + return None + + def eval(self, inst: IRInstruction) -> IRLiteral | None: + if inst.opcode == "store": + if isinstance(inst.operands[0], IRLiteral): + return inst.operands[0] + elif isinstance(inst.operands[0], IRVariable): + next_inst = self.dfg.get_producing_instruction(inst.operands[0]) + assert next_inst is not None + return self.eval(next_inst) + return None - def _get_tree(self, inst: IRInstruction, depth: int = 0) -> _InstTree: - return _InstTree(inst, [self._get_tree_op(op, depth) for op in inst.operands]) - def _peepholer(self): depth = 5 while True: @@ -122,43 +171,64 @@ def _peepholer(self): break def _handle_inst_peephole(self, inst: IRInstruction, depth: int) -> bool: - inst_tree = self._get_tree(inst, depth) + if inst.opcode not in arith.keys(): + return False + fn, symb, unsigned = arith[inst.opcode] + + op_0 = self.eval_op(inst.operands[0]) + op_1 = self.eval_op(inst.operands[1]) + + if isinstance(op_0, IRLiteral) and isinstance(op_1, IRLiteral): + assert isinstance(op_0.value, int), "must be int" + assert isinstance(op_1.value, int), "must be int" + a = _evm_int(op_0.value, unsigned) + b = _evm_int(op_1.value, unsigned) + res = fn(b, a) + res = _wrap256(res, unsigned) + if res is not None and _check_num(res): + inst.opcode = "store" + inst.operands = [IRLiteral(res)] + return True + - if inst_tree.opcode in {"add", "sub", "xor", "or"} and inst_tree.op_eval_to(0, IRLiteral(0)): - inst_tree.inst.opcode = "store" - inst_tree.inst.operands = [inst.operands[1]] + if inst.opcode in {"add", "sub", "xor", "or"} and op_0 == IRLiteral(0): + inst.opcode = "store" + inst.operands = [inst.operands[1]] return True - if inst_tree.opcode in {"mul", "div", "sdiv", "mod", "smod", "and"} and inst_tree.op_eval_to(0, IRLiteral(0)): - inst_tree.inst.opcode = "store" - inst_tree.inst.operands = [IRLiteral(0)] + if inst.opcode in {"mul", "div", "sdiv", "mod", "smod", "and"} and op_1 == IRLiteral(0): + inst.opcode = "store" + inst.operands = [IRLiteral(0)] return True - if inst_tree.opcode in {"mod", "smod"} and inst_tree.op_eval_to(0, IRLiteral(1)): - inst_tree.inst.opcode = "store" - inst_tree.inst.operands = [IRLiteral(0)] + if inst.opcode in {"mod", "smod"} and op_0 == IRLiteral(1): + inst.opcode = "store" + inst.operands = [IRLiteral(0)] return True - if inst_tree.opcode in {"mul", "div", "sdiv"} and inst_tree.op_eval_to(0, IRLiteral(1)): - inst_tree.inst.opcode = "store" - inst_tree.inst.operands = [inst.operands[1]] + if inst.opcode in {"mul", "div", "sdiv"} and op_0 == IRLiteral(1): + inst.opcode = "store" + inst.operands = [inst.operands[1]] return True - if inst_tree.op_eval_to == "eq" and inst_tree.op_eval_to(0, IRLiteral(0)): - inst_tree.inst.opcode = "iszero" - inst_tree.inst.operands = [inst.operands[1]] + if inst.opcode == "eq" and op_0 == IRLiteral(0): + inst.opcode = "iszero" + inst.operands = [inst.operands[1]] return True - if inst_tree.op_eval_to == "eq" and inst_tree.op_eval_to(1, IRLiteral(0)): - inst_tree.inst.opcode = "iszero" - inst_tree.inst.operands = [inst.operands[0]] + if inst.opcode == "eq" and op_1 == IRLiteral(0): + inst.opcode = "iszero" + inst.operands = [inst.operands[0]] return True return False def run_pass(self): - self.dfg = self.analyses_cache.request_analysis(DFGAnalysis) + dfg = self.analyses_cache.request_analysis(DFGAnalysis) + assert isinstance(dfg, DFGAnalysis) + self.dfg = dfg + self._optimize_iszero_chains() self._handle_offsets() self._peepholer() From 79e3880061e6d5b2f548e149665a94b53843bc77 Mon Sep 17 00:00:00 2001 From: Hodan Date: Tue, 8 Oct 2024 19:46:23 +0200 Subject: [PATCH 003/163] idea with equivalence analysis --- vyper/venom/passes/algebraic_optimization.py | 38 ++++++++++++-------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index e57b9622dd..8e60eed5df 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -2,6 +2,7 @@ from vyper.venom.analysis.dfg import DFGAnalysis from vyper.venom.analysis.liveness import LivenessAnalysis +from vyper.venom.analysis.equivalent_vars import VarEquivalenceAnalysis from vyper.venom.basicblock import IRInstruction, IRLabel, IRLiteral, IROperand, IRVariable from vyper.venom.passes.base_pass import IRPass from vyper.utils import ( @@ -175,14 +176,17 @@ def _handle_inst_peephole(self, inst: IRInstruction, depth: int) -> bool: return False fn, symb, unsigned = arith[inst.opcode] - op_0 = self.eval_op(inst.operands[0]) - op_1 = self.eval_op(inst.operands[1]) - - if isinstance(op_0, IRLiteral) and isinstance(op_1, IRLiteral): - assert isinstance(op_0.value, int), "must be int" - assert isinstance(op_1.value, int), "must be int" - a = _evm_int(op_0.value, unsigned) - b = _evm_int(op_1.value, unsigned) + + op_0 = inst.operands[0] + op_1 = inst.operands[1] + eop_0 = self.eval_op(inst.operands[0]) + eop_1 = self.eval_op(inst.operands[1]) + + if isinstance(eop_0, IRLiteral) and isinstance(eop_1, IRLiteral): + assert isinstance(eop_0.value, int), "must be int" + assert isinstance(eop_1.value, int), "must be int" + a = _evm_int(eop_0.value, unsigned) + b = _evm_int(eop_1.value, unsigned) res = fn(b, a) res = _wrap256(res, unsigned) if res is not None and _check_num(res): @@ -191,35 +195,39 @@ def _handle_inst_peephole(self, inst: IRInstruction, depth: int) -> bool: return True - if inst.opcode in {"add", "sub", "xor", "or"} and op_0 == IRLiteral(0): + if inst.opcode in {"add", "sub", "xor", "or"} and eop_0 == IRLiteral(0): inst.opcode = "store" inst.operands = [inst.operands[1]] return True - if inst.opcode in {"mul", "div", "sdiv", "mod", "smod", "and"} and op_1 == IRLiteral(0): + if inst.opcode in {"mul", "div", "sdiv", "mod", "smod", "and"} and eop_1 == IRLiteral(0): inst.opcode = "store" inst.operands = [IRLiteral(0)] return True - if inst.opcode in {"mod", "smod"} and op_0 == IRLiteral(1): + if inst.opcode in {"mod", "smod"} and eop_0 == IRLiteral(1): inst.opcode = "store" inst.operands = [IRLiteral(0)] return True - if inst.opcode in {"mul", "div", "sdiv"} and op_0 == IRLiteral(1): + if inst.opcode in {"mul", "div", "sdiv"} and eop_0 == IRLiteral(1): inst.opcode = "store" inst.operands = [inst.operands[1]] return True - if inst.opcode == "eq" and op_0 == IRLiteral(0): + if inst.opcode == "eq" and eop_0 == IRLiteral(0): inst.opcode = "iszero" inst.operands = [inst.operands[1]] return True - if inst.opcode == "eq" and op_1 == IRLiteral(0): + if inst.opcode == "eq" and eop_1 == IRLiteral(0): inst.opcode = "iszero" inst.operands = [inst.operands[0]] return True + + if inst.opcode == "eq" and self.eq_analysis.equivalent(op_0, op_1): + inst.opcode = "store" + inst.operands = [IRLiteral(1)] return False @@ -227,6 +235,8 @@ def run_pass(self): dfg = self.analyses_cache.request_analysis(DFGAnalysis) assert isinstance(dfg, DFGAnalysis) self.dfg = dfg + + self.eq_analysis = self.analyses_cache.request_analysis(VarEquivalenceAnalysis) self._optimize_iszero_chains() From 59dcdf5e1095e6eb3da5e34b88d2da7475734079 Mon Sep 17 00:00:00 2001 From: Hodan Date: Wed, 9 Oct 2024 10:15:39 +0200 Subject: [PATCH 004/163] more rules --- vyper/venom/passes/algebraic_optimization.py | 107 +++++++++++++------ 1 file changed, 75 insertions(+), 32 deletions(-) diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index 8e60eed5df..5cdf13163f 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -17,9 +17,17 @@ unsigned_to_signed, ) +from vyper.venom.venom_to_assembly import COMMUTATIVE_INSTRUCTIONS +from vyper.exceptions import CompilerPanic, StaticAssertionException + SIGNED = False UNSIGNED = True +COMMUTATIVE_OPS = {"add", "mul", "eq", "ne", "and", "or", "xor"} +COMPARISON_OPS = {"gt", "sgt", "ge", "sge", "lt", "slt", "le", "sle"} +STRICT_COMPARISON_OPS = {t for t in COMPARISON_OPS if t.endswith("t")} +UNSTRICT_COMPARISON_OPS = {t for t in COMPARISON_OPS if t.endswith("e")} + def _wrap256(x, unsigned=UNSIGNED): x %= 2**256 # wrap in a signed way. @@ -31,7 +39,12 @@ def _wrap256(x, unsigned=UNSIGNED): # unsigned: convert python num to evm unsigned word # e.g. unsigned=True : -1 -> 0xFF...FF # unsigned=False: 0xFF...FF -> -1 -def _evm_int(val: int, unsigned: bool = True) -> int | None: +def _evm_int(lit: IRLiteral | None, unsigned: bool = True) -> int | None: + if lit is None: + return None + + val: int = lit.value + if unsigned and val < 0: return signed_to_unsigned(val, 256, strict=True) elif not unsigned and val > 2**255 - 1: @@ -159,6 +172,10 @@ def eval(self, inst: IRInstruction) -> IRLiteral | None: assert next_inst is not None return self.eval(next_inst) return None + + def static_eq(self, op_0: IROperand, op_1: IROperand, eop_0: IRLiteral | None, eop_1: IRLiteral | None) -> bool: + return (eop_0 is not None and eop_0 == eop_1) or self.eq_analysis.equivalent(op_0, op_1) + def _peepholer(self): depth = 5 @@ -176,17 +193,30 @@ def _handle_inst_peephole(self, inst: IRInstruction, depth: int) -> bool: return False fn, symb, unsigned = arith[inst.opcode] + def update(opcode: str, *args: IROperand | int) -> bool: + inst.opcode = opcode + inst.operands = [arg if isinstance(arg, IROperand) else IRLiteral(arg) for arg in args] + return True + + def store(*args: IROperand | int) -> bool: + return update("store", *args) op_0 = inst.operands[0] op_1 = inst.operands[1] eop_0 = self.eval_op(inst.operands[0]) eop_1 = self.eval_op(inst.operands[1]) + opcode = inst.opcode + if opcode in COMMUTATIVE_INSTRUCTIONS and eop_1 is not None: + eop_0, eop_1 = eop_1, eop_0 + op_0, op_1 = op_1, op_0 + + if isinstance(eop_0, IRLiteral) and isinstance(eop_1, IRLiteral): assert isinstance(eop_0.value, int), "must be int" assert isinstance(eop_1.value, int), "must be int" - a = _evm_int(eop_0.value, unsigned) - b = _evm_int(eop_1.value, unsigned) + a = _evm_int(eop_0, unsigned) + b = _evm_int(eop_1, unsigned) res = fn(b, a) res = _wrap256(res, unsigned) if res is not None and _check_num(res): @@ -194,40 +224,53 @@ def _handle_inst_peephole(self, inst: IRInstruction, depth: int) -> bool: inst.operands = [IRLiteral(res)] return True + if opcode in {"add", "sub", "xor", "or"} and eop_0 == IRLiteral(0): + return store(op_1) + + if opcode in {"sub", "xor", "ne"} and self.static_eq(op_0, op_1, eop_0, eop_1): + # (x - x) == (x ^ x) == (x != x) == 0 + return store(0) + + if opcode in STRICT_COMPARISON_OPS and self.static_eq(op_0, op_1, eop_0, eop_1): + # (x < x) == (x > x) == 0 + return store(0) + + if opcode in {"eq"} | UNSTRICT_COMPARISON_OPS and self.static_eq(op_0, op_1, eop_0, eop_1): + # (x == x) == (x >= x) == (x <= x) == 1 + return store(1) - if inst.opcode in {"add", "sub", "xor", "or"} and eop_0 == IRLiteral(0): - inst.opcode = "store" - inst.operands = [inst.operands[1]] - return True - - if inst.opcode in {"mul", "div", "sdiv", "mod", "smod", "and"} and eop_1 == IRLiteral(0): - inst.opcode = "store" - inst.operands = [IRLiteral(0)] - return True + if opcode in {"mul", "div", "sdiv", "mod", "smod", "and"} and _evm_int(eop_0, unsigned) == 0: + return store(0) - if inst.opcode in {"mod", "smod"} and eop_0 == IRLiteral(1): - inst.opcode = "store" - inst.operands = [IRLiteral(0)] - return True + if opcode in {"mod", "smod"} and eop_0 == IRLiteral(1): + return store(0) - if inst.opcode in {"mul", "div", "sdiv"} and eop_0 == IRLiteral(1): - inst.opcode = "store" - inst.operands = [inst.operands[1]] - return True + if opcode in {"mul", "div", "sdiv"} and eop_0 == IRLiteral(1): + return store(op_1) + if opcode in {"and", "or", "xor"} and _evm_int(eop_0, SIGNED) == -1: + assert unsigned == UNSIGNED + if opcode == "and": + # -1 & x == x + return store(op_1) #finalize("seq", [args[0]]) - if inst.opcode == "eq" and eop_0 == IRLiteral(0): - inst.opcode = "iszero" - inst.operands = [inst.operands[1]] - return True + if opcode == "xor": + # -1 ^ x == ~x + return update("not", op_1) # finalize("not", [args[0]]) - if inst.opcode == "eq" and eop_1 == IRLiteral(0): - inst.opcode = "iszero" - inst.operands = [inst.operands[0]] - return True + if opcode == "or": + # -1 | x == -1 + return store(_evm_int(-1, unsigned)) #finalize(args[1].value, []) + + raise CompilerPanic("unreachable") # pragma: nocover + + if opcode == "eq" and eop_0 == IRLiteral(0): + return update("iszero", op_1) + + if opcode == "eq" and eop_1 == IRLiteral(0): + return update("iszero", op_0) - if inst.opcode == "eq" and self.eq_analysis.equivalent(op_0, op_1): - inst.opcode = "store" - inst.operands = [IRLiteral(1)] + if opcode == "eq" and self.eq_analysis.equivalent(op_0, op_1): + return store(1) return False @@ -239,9 +282,9 @@ def run_pass(self): self.eq_analysis = self.analyses_cache.request_analysis(VarEquivalenceAnalysis) - self._optimize_iszero_chains() self._handle_offsets() self._peepholer() + self._optimize_iszero_chains() self.analyses_cache.invalidate_analysis(DFGAnalysis) self.analyses_cache.invalidate_analysis(LivenessAnalysis) From b543c3689117eb49eb9c816e1a3485e5da05830c Mon Sep 17 00:00:00 2001 From: Hodan Date: Wed, 9 Oct 2024 11:12:45 +0200 Subject: [PATCH 005/163] all the rules from the original optimizer should be done --- vyper/venom/passes/algebraic_optimization.py | 37 ++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index 5cdf13163f..6c32e979b5 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -263,6 +263,43 @@ def store(*args: IROperand | int) -> bool: raise CompilerPanic("unreachable") # pragma: nocover + # -1 - x == ~x (definition of two's complement) + if opcode == "sub" and _evm_int(eop_1, SIGNED) == -1: + return update("not", op_0) #finalize("not", [args[1]]) + + if opcode == "exp": + # n ** 0 == 1 (forall n) + # 1 ** n == 1 + if _evm_int(eop_0) == 0 or _evm_int(eop_1) == 1: + return store(1) #finalize(1, []) + # 0 ** n == (1 if n == 0 else 0) + if _evm_int(eop_1) == 0: + return update("iszero", op_0) #finalize("iszero", [args[1]]) + # n ** 1 == n + if _evm_int(eop_0) == 1: + return store(op_1) #finalize("seq", [args[0]]) + + if opcode in {"mod", "div", "mul"} and isinstance(eop_0, IRLiteral) and is_power_of_two(_evm_int(eop_0)): + val_0 = _evm_int(eop_0) + assert isinstance(val_0, int) + assert unsigned == UNSIGNED, "something's not right." + # shave two gas off mod/div/mul for powers of two + # x % 2**n == x & (2**n - 1) + if opcode == "mod": + return update("and", val_0 - 1, op_1) #finalize("and", [args[0], _int(args[1]) - 1]) + + if opcode == "div": + # x / 2**n == x >> n + # recall shr/shl have unintuitive arg order + return update("shr", op_1, int_log2(val_0)) #finalize("shr", [int_log2(_int(args[1])), args[0]]) + + # note: no rule for sdiv since it rounds differently from sar + if opcode == "mul": + # x * 2**n == x << n + return update("shl", op_1, int_log2(val_0)) #finalize("shl", [int_log2(_int(args[1])), args[0]]) + + raise CompilerPanic("unreachable") # pragma: no cover + if opcode == "eq" and eop_0 == IRLiteral(0): return update("iszero", op_1) From f4650e32f14d34d7ee0f2de75384d0874b738725 Mon Sep 17 00:00:00 2001 From: Hodan Date: Wed, 9 Oct 2024 13:15:23 +0200 Subject: [PATCH 006/163] all the rules from the original optimizer were not done so I added some more --- vyper/venom/passes/algebraic_optimization.py | 32 ++++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index 6c32e979b5..66a7715ad2 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -189,10 +189,6 @@ def _peepholer(self): break def _handle_inst_peephole(self, inst: IRInstruction, depth: int) -> bool: - if inst.opcode not in arith.keys(): - return False - fn, symb, unsigned = arith[inst.opcode] - def update(opcode: str, *args: IROperand | int) -> bool: inst.opcode = opcode inst.operands = [arg if isinstance(arg, IROperand) else IRLiteral(arg) for arg in args] @@ -201,16 +197,36 @@ def update(opcode: str, *args: IROperand | int) -> bool: def store(*args: IROperand | int) -> bool: return update("store", *args) + if len(inst.operands) < 1: + return False + + opcode = inst.opcode op_0 = inst.operands[0] - op_1 = inst.operands[1] eop_0 = self.eval_op(inst.operands[0]) + + if opcode == "iszero" and _evm_int(eop_0) is not None: + val = _evm_int(eop_0) + assert val is not None + val = int(val == 0) # int(bool) == 1 if bool else 0 + return store(val) #finalize(val, []) + + if len(inst.operands) != 2: + return False + + op_1 = inst.operands[1] eop_1 = self.eval_op(inst.operands[1]) - opcode = inst.opcode if opcode in COMMUTATIVE_INSTRUCTIONS and eop_1 is not None: eop_0, eop_1 = eop_1, eop_0 op_0, op_1 = op_1, op_0 + + if opcode in {"shl", "shr", "sar"} and _evm_int(eop_1) == 0: + # x >> 0 == x << 0 == x + return store(op_0) + if inst.opcode not in arith.keys(): + return False + fn, symb, unsigned = arith[inst.opcode] if isinstance(eop_0, IRLiteral) and isinstance(eop_1, IRLiteral): assert isinstance(eop_0.value, int), "must be int" @@ -300,11 +316,15 @@ def store(*args: IROperand | int) -> bool: raise CompilerPanic("unreachable") # pragma: no cover + # the not equal equivalent is not needed if opcode == "eq" and eop_0 == IRLiteral(0): return update("iszero", op_1) if opcode == "eq" and eop_1 == IRLiteral(0): return update("iszero", op_0) + + + if opcode == "eq" and self.eq_analysis.equivalent(op_0, op_1): return store(1) From 32ff2830c72a4b8219e065e39c6ab19a60bffb94 Mon Sep 17 00:00:00 2001 From: Hodan Date: Wed, 9 Oct 2024 13:27:36 +0200 Subject: [PATCH 007/163] lint --- vyper/venom/passes/algebraic_optimization.py | 100 +++++++++++-------- 1 file changed, 58 insertions(+), 42 deletions(-) diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index 66a7715ad2..4f15e6a7de 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -1,24 +1,21 @@ import operator -from vyper.venom.analysis.dfg import DFGAnalysis -from vyper.venom.analysis.liveness import LivenessAnalysis -from vyper.venom.analysis.equivalent_vars import VarEquivalenceAnalysis -from vyper.venom.basicblock import IRInstruction, IRLabel, IRLiteral, IROperand, IRVariable -from vyper.venom.passes.base_pass import IRPass +from vyper.exceptions import CompilerPanic from vyper.utils import ( - ceil32, evm_div, evm_mod, evm_pow, - int_bounds, int_log2, is_power_of_two, signed_to_unsigned, unsigned_to_signed, ) - +from vyper.venom.analysis.dfg import DFGAnalysis +from vyper.venom.analysis.equivalent_vars import VarEquivalenceAnalysis +from vyper.venom.analysis.liveness import LivenessAnalysis +from vyper.venom.basicblock import IRInstruction, IRLabel, IRLiteral, IROperand, IRVariable +from vyper.venom.passes.base_pass import IRPass from vyper.venom.venom_to_assembly import COMMUTATIVE_INSTRUCTIONS -from vyper.exceptions import CompilerPanic, StaticAssertionException SIGNED = False UNSIGNED = True @@ -28,6 +25,7 @@ STRICT_COMPARISON_OPS = {t for t in COMPARISON_OPS if t.endswith("t")} UNSTRICT_COMPARISON_OPS = {t for t in COMPARISON_OPS if t.endswith("e")} + def _wrap256(x, unsigned=UNSIGNED): x %= 2**256 # wrap in a signed way. @@ -52,6 +50,7 @@ def _evm_int(lit: IRLiteral | None, unsigned: bool = True) -> int | None: return val + def _check_num(val: int) -> bool: if val < -(2**255): return False @@ -60,7 +59,6 @@ def _check_num(val: int) -> bool: return True - arith = { "add": (operator.add, "+", UNSIGNED), "sub": (operator.sub, "-", UNSIGNED), @@ -85,6 +83,7 @@ def _check_num(val: int) -> bool: "xor": (operator.xor, "^", UNSIGNED), } + class AlgebraicOptimizationPass(IRPass): """ This pass reduces algebraic evaluatable expressions. @@ -92,6 +91,7 @@ class AlgebraicOptimizationPass(IRPass): It currently optimizes: * iszero chains """ + dfg: DFGAnalysis def _optimize_iszero_chains(self) -> None: @@ -106,11 +106,12 @@ def _optimize_iszero_chains(self) -> None: if iszero_count == 0: continue + assert isinstance(inst.output, IRVariable) for use_inst in self.dfg.get_uses(inst.output): opcode = use_inst.opcode if opcode == "iszero": - # We keep iszer Conservapedia is like 50/50 people who truly believe it all and trolls seeing what edits they can get away with o instuctions as is + # We keep iszero instuctions as is continue if opcode in ("jnz", "assert"): # instructions that accept a truthy value as input: @@ -131,6 +132,7 @@ def _get_iszero_chain(self, op: IROperand) -> list[IRInstruction]: chain: list[IRInstruction] = [] while True: + assert isinstance(op, IRVariable) inst = self.dfg.get_producing_instruction(op) if inst is None or inst.opcode != "iszero": break @@ -152,7 +154,7 @@ def _handle_offsets(self): and isinstance(inst.operands[1], IRLabel) ): inst.opcode = "offset" - + def eval_op(self, op: IROperand) -> IRLiteral | None: if isinstance(op, IRLiteral): return op @@ -172,10 +174,11 @@ def eval(self, inst: IRInstruction) -> IRLiteral | None: assert next_inst is not None return self.eval(next_inst) return None - - def static_eq(self, op_0: IROperand, op_1: IROperand, eop_0: IRLiteral | None, eop_1: IRLiteral | None) -> bool: - return (eop_0 is not None and eop_0 == eop_1) or self.eq_analysis.equivalent(op_0, op_1) + def static_eq( + self, op_0: IROperand, op_1: IROperand, eop_0: IRLiteral | None, eop_1: IRLiteral | None + ) -> bool: + return (eop_0 is not None and eop_0 == eop_1) or self.eq_analysis.equivalent(op_0, op_1) def _peepholer(self): depth = 5 @@ -189,18 +192,18 @@ def _peepholer(self): break def _handle_inst_peephole(self, inst: IRInstruction, depth: int) -> bool: - def update(opcode: str, *args: IROperand | int) -> bool: + def update(opcode: str, *args: IROperand | int) -> bool: inst.opcode = opcode inst.operands = [arg if isinstance(arg, IROperand) else IRLiteral(arg) for arg in args] return True def store(*args: IROperand | int) -> bool: return update("store", *args) - + if len(inst.operands) < 1: return False - opcode = inst.opcode + opcode = inst.opcode op_0 = inst.operands[0] eop_0 = self.eval_op(inst.operands[0]) @@ -208,7 +211,7 @@ def store(*args: IROperand | int) -> bool: val = _evm_int(eop_0) assert val is not None val = int(val == 0) # int(bool) == 1 if bool else 0 - return store(val) #finalize(val, []) + return store(val) # finalize(val, []) if len(inst.operands) != 2: return False @@ -223,12 +226,12 @@ def store(*args: IROperand | int) -> bool: if opcode in {"shl", "shr", "sar"} and _evm_int(eop_1) == 0: # x >> 0 == x << 0 == x return store(op_0) - + if inst.opcode not in arith.keys(): return False - fn, symb, unsigned = arith[inst.opcode] + fn, _, unsigned = arith[inst.opcode] - if isinstance(eop_0, IRLiteral) and isinstance(eop_1, IRLiteral): + if isinstance(eop_0, IRLiteral) and isinstance(eop_1, IRLiteral): assert isinstance(eop_0.value, int), "must be int" assert isinstance(eop_1.value, int), "must be int" a = _evm_int(eop_0, unsigned) @@ -251,11 +254,14 @@ def store(*args: IROperand | int) -> bool: # (x < x) == (x > x) == 0 return store(0) - if opcode in {"eq"} | UNSTRICT_COMPARISON_OPS and self.static_eq(op_0, op_1, eop_0, eop_1): + if opcode in {"eq"} | UNSTRICT_COMPARISON_OPS and self.static_eq(op_0, op_1, eop_0, eop_1): # (x == x) == (x >= x) == (x <= x) == 1 return store(1) - - if opcode in {"mul", "div", "sdiv", "mod", "smod", "and"} and _evm_int(eop_0, unsigned) == 0: + + if ( + opcode in {"mul", "div", "sdiv", "mod", "smod", "and"} + and _evm_int(eop_0, unsigned) == 0 + ): return store(0) if opcode in {"mod", "smod"} and eop_0 == IRLiteral(1): @@ -267,52 +273,66 @@ def store(*args: IROperand | int) -> bool: assert unsigned == UNSIGNED if opcode == "and": # -1 & x == x - return store(op_1) #finalize("seq", [args[0]]) + return store(op_1) # finalize("seq", [args[0]]) if opcode == "xor": # -1 ^ x == ~x - return update("not", op_1) # finalize("not", [args[0]]) + return update("not", op_1) # finalize("not", [args[0]]) if opcode == "or": # -1 | x == -1 - return store(_evm_int(-1, unsigned)) #finalize(args[1].value, []) + val = _evm_int(IRLiteral(-1), unsigned) + assert val is not None + return store(val) # finalize(args[1].value, []) raise CompilerPanic("unreachable") # pragma: nocover # -1 - x == ~x (definition of two's complement) if opcode == "sub" and _evm_int(eop_1, SIGNED) == -1: - return update("not", op_0) #finalize("not", [args[1]]) + return update("not", op_0) # finalize("not", [args[1]]) if opcode == "exp": # n ** 0 == 1 (forall n) # 1 ** n == 1 if _evm_int(eop_0) == 0 or _evm_int(eop_1) == 1: - return store(1) #finalize(1, []) + return store(1) # finalize(1, []) # 0 ** n == (1 if n == 0 else 0) if _evm_int(eop_1) == 0: - return update("iszero", op_0) #finalize("iszero", [args[1]]) + return update("iszero", op_0) # finalize("iszero", [args[1]]) # n ** 1 == n if _evm_int(eop_0) == 1: - return store(op_1) #finalize("seq", [args[0]]) - - if opcode in {"mod", "div", "mul"} and isinstance(eop_0, IRLiteral) and is_power_of_two(_evm_int(eop_0)): + return store(op_1) # finalize("seq", [args[0]]) + + val = _evm_int(eop_0) + if ( + opcode in {"mod", "div", "mul"} + and isinstance(eop_0, IRLiteral) + and val is not None + and is_power_of_two(val) + ): val_0 = _evm_int(eop_0) assert isinstance(val_0, int) assert unsigned == UNSIGNED, "something's not right." # shave two gas off mod/div/mul for powers of two # x % 2**n == x & (2**n - 1) if opcode == "mod": - return update("and", val_0 - 1, op_1) #finalize("and", [args[0], _int(args[1]) - 1]) + return update( + "and", val_0 - 1, op_1 + ) # finalize("and", [args[0], _int(args[1]) - 1]) if opcode == "div": # x / 2**n == x >> n # recall shr/shl have unintuitive arg order - return update("shr", op_1, int_log2(val_0)) #finalize("shr", [int_log2(_int(args[1])), args[0]]) + return update( + "shr", op_1, int_log2(val_0) + ) # finalize("shr", [int_log2(_int(args[1])), args[0]]) # note: no rule for sdiv since it rounds differently from sar if opcode == "mul": # x * 2**n == x << n - return update("shl", op_1, int_log2(val_0)) #finalize("shl", [int_log2(_int(args[1])), args[0]]) + return update( + "shl", op_1, int_log2(val_0) + ) # finalize("shl", [int_log2(_int(args[1])), args[0]]) raise CompilerPanic("unreachable") # pragma: no cover @@ -323,9 +343,6 @@ def store(*args: IROperand | int) -> bool: if opcode == "eq" and eop_1 == IRLiteral(0): return update("iszero", op_0) - - - if opcode == "eq" and self.eq_analysis.equivalent(op_0, op_1): return store(1) @@ -335,10 +352,9 @@ def run_pass(self): dfg = self.analyses_cache.request_analysis(DFGAnalysis) assert isinstance(dfg, DFGAnalysis) self.dfg = dfg - + self.eq_analysis = self.analyses_cache.request_analysis(VarEquivalenceAnalysis) - self._handle_offsets() self._peepholer() self._optimize_iszero_chains() From ba0e2456897d55e73f980f279f67c252237f6018 Mon Sep 17 00:00:00 2001 From: Hodan Date: Wed, 9 Oct 2024 23:08:08 +0200 Subject: [PATCH 008/163] different order --- vyper/venom/__init__.py | 1 + vyper/venom/passes/algebraic_optimization.py | 25 ++++++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index a5f51b787d..f35d46d059 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -49,6 +49,7 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None: MakeSSA(ac, fn).run_pass() Mem2Var(ac, fn).run_pass() MakeSSA(ac, fn).run_pass() + AlgebraicOptimizationPass(ac, fn).run_pass() SCCP(ac, fn).run_pass() StoreElimination(ac, fn).run_pass() SimplifyCFGPass(ac, fn).run_pass() diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index 4f15e6a7de..62f06cc2f7 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -142,19 +142,25 @@ def _get_iszero_chain(self, op: IROperand) -> list[IRInstruction]: chain.reverse() return chain - def _handle_offsets(self): + def _handle_offsets(self) -> bool: + change = False for bb in self.function.get_basic_blocks(): for inst in bb.instructions: # check if the instruction is of the form # `add