feat[venom]: add codesize optimization pass (vyperlang#4333)

add basic codesize optimization pass for venom which strengthens large literals to evm computations which are shorter --------- Co-authored-by: Hodan <[email protected]>
charles-cooper · Dec 20, 2024 · a56d79d · a56d79d
1 parent 9ff9080
commit a56d79d
Show file tree

Hide file tree

Showing 6 changed files with 188 additions and 6 deletions.
diff --git a/tests/unit/compiler/venom/test_literals_codesize.py b/tests/unit/compiler/venom/test_literals_codesize.py
@@ -0,0 +1,117 @@
+import pytest
+
+from vyper.utils import evm_not
+from vyper.venom.analysis import IRAnalysesCache
+from vyper.venom.basicblock import IRLiteral
+from vyper.venom.context import IRContext
+from vyper.venom.passes import ReduceLiteralsCodesize
+
+
+def _calc_push_size(val: int):
+    s = hex(val).removeprefix("0x")
+    if len(s) % 2 != 0:  # justify to multiple of 2
+        s = "0" + s
+    return 1 + len(s)
+
+
+should_invert = [2**256 - 1] + [((2**i) - 1) << (256 - i) for i in range(121, 256 + 1)]
+
+
+@pytest.mark.parametrize("orig_value", should_invert)
+def test_literal_codesize_ff_inversion(orig_value):
+    """
+    Test that literals like 0xfffffffffffabcd get inverted to `not 0x5432`
+    """
+    ctx = IRContext()
+    fn = ctx.create_function("_global")
+    bb = fn.get_basic_block()
+
+    bb.append_instruction("store", IRLiteral(orig_value))
+    bb.append_instruction("stop")
+    ac = IRAnalysesCache(fn)
+    ReduceLiteralsCodesize(ac, fn).run_pass()
+
+    inst0 = bb.instructions[0]
+    assert inst0.opcode == "not"
+    op0 = inst0.operands[0]
+    assert evm_not(op0.value) == orig_value
+    # check the optimization actually improved codesize, after accounting
+    # for the addl NOT instruction
+    assert _calc_push_size(op0.value) + 1 < _calc_push_size(orig_value)
+
+
+should_not_invert = [1, 0xFE << 248 | (2**248 - 1)] + [
+    ((2**255 - 1) >> i) << i for i in range(0, 3 * 8)
+]
+
+
+@pytest.mark.parametrize("orig_value", should_not_invert)
+def test_literal_codesize_no_inversion(orig_value):
+    """
+    Check funky cases where inversion would result in bytecode increase
+    """
+    ctx = IRContext()
+    fn = ctx.create_function("_global")
+    bb = fn.get_basic_block()
+
+    bb.append_instruction("store", IRLiteral(orig_value))
+    bb.append_instruction("stop")
+    ac = IRAnalysesCache(fn)
+    ReduceLiteralsCodesize(ac, fn).run_pass()
+
+    assert bb.instructions[0].opcode == "store"
+    assert bb.instructions[0].operands[0].value == orig_value
+
+
+should_shl = (
+    [2**i for i in range(3 * 8, 255)]
+    + [((2**i) - 1) << (256 - i) for i in range(1, 121)]
+    + [((2**255 - 1) >> i) << i for i in range(3 * 8, 254)]
+)
+
+
+@pytest.mark.parametrize("orig_value", should_shl)
+def test_literal_codesize_shl(orig_value):
+    """
+    Test that literals like 0xabcd00000000 get transformed to `shl 32 0xabcd`
+    """
+    ctx = IRContext()
+    fn = ctx.create_function("_global")
+    bb = fn.get_basic_block()
+
+    bb.append_instruction("store", IRLiteral(orig_value))
+    bb.append_instruction("stop")
+    ac = IRAnalysesCache(fn)
+    ReduceLiteralsCodesize(ac, fn).run_pass()
+
+    assert bb.instructions[0].opcode == "shl"
+    op0, op1 = bb.instructions[0].operands
+    assert op0.value << op1.value == orig_value
+
+    # check the optimization actually improved codesize, after accounting
+    # for the addl PUSH and SHL instructions
+    assert _calc_push_size(op0.value) + _calc_push_size(op1.value) + 1 < _calc_push_size(orig_value)
+
+
+should_not_shl = [1 << i for i in range(0, 3 * 8)] + [
+    0x0,
+    (((2 ** (256 - 2)) - 1) << (2 * 8)) ^ (2**255),
+]
+
+
+@pytest.mark.parametrize("orig_value", should_not_shl)
+def test_literal_codesize_no_shl(orig_value):
+    """
+    Check funky cases where shl transformation would result in bytecode increase
+    """
+    ctx = IRContext()
+    fn = ctx.create_function("_global")
+    bb = fn.get_basic_block()
+
+    bb.append_instruction("store", IRLiteral(orig_value))
+    bb.append_instruction("stop")
+    ac = IRAnalysesCache(fn)
+    ReduceLiteralsCodesize(ac, fn).run_pass()
+
+    assert bb.instructions[0].opcode == "store"
+    assert bb.instructions[0].operands[0].value == orig_value
diff --git a/vyper/utils.py b/vyper/utils.py
@@ -391,6 +391,11 @@ def evm_twos_complement(x: int) -> int:
     return ((2**256 - 1) ^ x) + 1
 
 
+def evm_not(val: int) -> int:
+    assert 0 <= val <= SizeLimits.MAX_UINT256, "Value out of bounds"
+    return SizeLimits.MAX_UINT256 ^ val
+
+
 # EVM div semantics as a python function
 def evm_div(x, y):
     if y == 0:

diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py
@@ -19,6 +19,7 @@
     MakeSSA,
     Mem2Var,
     MemMergePass,
+    ReduceLiteralsCodesize,
     RemoveUnusedVariablesPass,
     SimplifyCFGPass,
     StoreElimination,
@@ -74,6 +75,10 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None:
     RemoveUnusedVariablesPass(ac, fn).run_pass()
 
     StoreExpansionPass(ac, fn).run_pass()
+
+    if optimize == OptimizationLevel.CODESIZE:
+        ReduceLiteralsCodesize(ac, fn).run_pass()
+
     DFTPass(ac, fn).run_pass()
 
 

diff --git a/vyper/venom/passes/__init__.py b/vyper/venom/passes/__init__.py
@@ -2,6 +2,7 @@
 from .branch_optimization import BranchOptimizationPass
 from .dft import DFTPass
 from .float_allocas import FloatAllocas
+from .literals_codesize import ReduceLiteralsCodesize
 from .lower_dload import LowerDloadPass
 from .make_ssa import MakeSSA
 from .mem2var import Mem2Var

diff --git a/vyper/venom/passes/literals_codesize.py b/vyper/venom/passes/literals_codesize.py
@@ -0,0 +1,58 @@
+from vyper.utils import evm_not
+from vyper.venom.basicblock import IRLiteral
+from vyper.venom.passes.base_pass import IRPass
+
+# not takes 1 byte1, so it makes sense to use it when we can save at least
+# 1 byte
+NOT_THRESHOLD = 1
+
+# shl takes 3 bytes, so it makes sense to use it when we can save at least
+# 3 bytes
+SHL_THRESHOLD = 3
+
+
+class ReduceLiteralsCodesize(IRPass):
+    def run_pass(self):
+        for bb in self.function.get_basic_blocks():
+            self._process_bb(bb)
+
+    def _process_bb(self, bb):
+        for inst in bb.instructions:
+            if inst.opcode != "store":
+                continue
+
+            (op,) = inst.operands
+            if not isinstance(op, IRLiteral):
+                continue
+
+            val = op.value % (2**256)
+
+            # calculate amount of bits saved by not optimization
+            not_benefit = ((len(hex(val)) // 2 - len(hex(evm_not(val))) // 2) - NOT_THRESHOLD) * 8
+
+            # calculate amount of bits saved by shl optimization
+            binz = bin(val)[2:]
+            ix = len(binz) - binz.rfind("1")
+            shl_benefit = ix - SHL_THRESHOLD * 8
+
+            if not_benefit <= 0 and shl_benefit <= 0:
+                # no optimization can be done here
+                continue
+
+            if not_benefit >= shl_benefit:
+                assert not_benefit > 0  # implied by previous conditions
+                # transform things like 0xffff...01 to (not 0xfe)
+                inst.opcode = "not"
+                op.value = evm_not(val)
+                continue
+            else:
+                assert shl_benefit > 0  # implied by previous conditions
+                # transform things like 0x123400....000 to 0x1234 << ...
+                ix -= 1
+                # sanity check
+                assert (val >> ix) << ix == val, val
+                assert (val >> ix) & 1 == 1, val
+
+                inst.opcode = "shl"
+                inst.operands = [IRLiteral(val >> ix), IRLiteral(ix)]
+                continue
diff --git a/vyper/venom/passes/sccp/eval.py b/vyper/venom/passes/sccp/eval.py
@@ -5,6 +5,7 @@
     SizeLimits,
     evm_div,
     evm_mod,
+    evm_not,
     evm_pow,
     signed_to_unsigned,
     unsigned_to_signed,
@@ -95,11 +96,6 @@ def _evm_sar(shift_len: int, value: int) -> int:
     return value >> shift_len
 
 
-def _evm_not(value: int) -> int:
-    assert 0 <= value <= SizeLimits.MAX_UINT256, "Value out of bounds"
-    return SizeLimits.MAX_UINT256 ^ value
-
-
 ARITHMETIC_OPS: dict[str, Callable[[list[IROperand]], int]] = {
     "add": _wrap_binop(operator.add),
     "sub": _wrap_binop(operator.sub),
@@ -122,7 +118,7 @@ def _evm_not(value: int) -> int:
     "or": _wrap_binop(operator.or_),
     "and": _wrap_binop(operator.and_),
     "xor": _wrap_binop(operator.xor),
-    "not": _wrap_unop(_evm_not),
+    "not": _wrap_unop(evm_not),
     "signextend": _wrap_binop(_evm_signextend),
     "iszero": _wrap_unop(_evm_iszero),
     "shr": _wrap_binop(_evm_shr),