diff --git a/tests/unit/compiler/venom/test_literals_codesize.py b/tests/unit/compiler/venom/test_literals_codesize.py new file mode 100644 index 0000000000..4de4d9de64 --- /dev/null +++ b/tests/unit/compiler/venom/test_literals_codesize.py @@ -0,0 +1,117 @@ +import pytest + +from vyper.utils import evm_not +from vyper.venom.analysis import IRAnalysesCache +from vyper.venom.basicblock import IRLiteral +from vyper.venom.context import IRContext +from vyper.venom.passes import ReduceLiteralsCodesize + + +def _calc_push_size(val: int): + s = hex(val).removeprefix("0x") + if len(s) % 2 != 0: # justify to multiple of 2 + s = "0" + s + return 1 + len(s) + + +should_invert = [2**256 - 1] + [((2**i) - 1) << (256 - i) for i in range(121, 256 + 1)] + + +@pytest.mark.parametrize("orig_value", should_invert) +def test_literal_codesize_ff_inversion(orig_value): + """ + Test that literals like 0xfffffffffffabcd get inverted to `not 0x5432` + """ + ctx = IRContext() + fn = ctx.create_function("_global") + bb = fn.get_basic_block() + + bb.append_instruction("store", IRLiteral(orig_value)) + bb.append_instruction("stop") + ac = IRAnalysesCache(fn) + ReduceLiteralsCodesize(ac, fn).run_pass() + + inst0 = bb.instructions[0] + assert inst0.opcode == "not" + op0 = inst0.operands[0] + assert evm_not(op0.value) == orig_value + # check the optimization actually improved codesize, after accounting + # for the addl NOT instruction + assert _calc_push_size(op0.value) + 1 < _calc_push_size(orig_value) + + +should_not_invert = [1, 0xFE << 248 | (2**248 - 1)] + [ + ((2**255 - 1) >> i) << i for i in range(0, 3 * 8) +] + + +@pytest.mark.parametrize("orig_value", should_not_invert) +def test_literal_codesize_no_inversion(orig_value): + """ + Check funky cases where inversion would result in bytecode increase + """ + ctx = IRContext() + fn = ctx.create_function("_global") + bb = fn.get_basic_block() + + bb.append_instruction("store", IRLiteral(orig_value)) + bb.append_instruction("stop") + ac = IRAnalysesCache(fn) + ReduceLiteralsCodesize(ac, fn).run_pass() + + assert bb.instructions[0].opcode == "store" + assert bb.instructions[0].operands[0].value == orig_value + + +should_shl = ( + [2**i for i in range(3 * 8, 255)] + + [((2**i) - 1) << (256 - i) for i in range(1, 121)] + + [((2**255 - 1) >> i) << i for i in range(3 * 8, 254)] +) + + +@pytest.mark.parametrize("orig_value", should_shl) +def test_literal_codesize_shl(orig_value): + """ + Test that literals like 0xabcd00000000 get transformed to `shl 32 0xabcd` + """ + ctx = IRContext() + fn = ctx.create_function("_global") + bb = fn.get_basic_block() + + bb.append_instruction("store", IRLiteral(orig_value)) + bb.append_instruction("stop") + ac = IRAnalysesCache(fn) + ReduceLiteralsCodesize(ac, fn).run_pass() + + assert bb.instructions[0].opcode == "shl" + op0, op1 = bb.instructions[0].operands + assert op0.value << op1.value == orig_value + + # check the optimization actually improved codesize, after accounting + # for the addl PUSH and SHL instructions + assert _calc_push_size(op0.value) + _calc_push_size(op1.value) + 1 < _calc_push_size(orig_value) + + +should_not_shl = [1 << i for i in range(0, 3 * 8)] + [ + 0x0, + (((2 ** (256 - 2)) - 1) << (2 * 8)) ^ (2**255), +] + + +@pytest.mark.parametrize("orig_value", should_not_shl) +def test_literal_codesize_no_shl(orig_value): + """ + Check funky cases where shl transformation would result in bytecode increase + """ + ctx = IRContext() + fn = ctx.create_function("_global") + bb = fn.get_basic_block() + + bb.append_instruction("store", IRLiteral(orig_value)) + bb.append_instruction("stop") + ac = IRAnalysesCache(fn) + ReduceLiteralsCodesize(ac, fn).run_pass() + + assert bb.instructions[0].opcode == "store" + assert bb.instructions[0].operands[0].value == orig_value diff --git a/vyper/utils.py b/vyper/utils.py index 9b5bfbef62..db50626713 100644 --- a/vyper/utils.py +++ b/vyper/utils.py @@ -391,6 +391,11 @@ def evm_twos_complement(x: int) -> int: return ((2**256 - 1) ^ x) + 1 +def evm_not(val: int) -> int: + assert 0 <= val <= SizeLimits.MAX_UINT256, "Value out of bounds" + return SizeLimits.MAX_UINT256 ^ val + + # EVM div semantics as a python function def evm_div(x, y): if y == 0: diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 89a4534a4c..d1eb4189c3 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -19,6 +19,7 @@ MakeSSA, Mem2Var, MemMergePass, + ReduceLiteralsCodesize, RemoveUnusedVariablesPass, SimplifyCFGPass, StoreElimination, @@ -74,6 +75,10 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None: RemoveUnusedVariablesPass(ac, fn).run_pass() StoreExpansionPass(ac, fn).run_pass() + + if optimize == OptimizationLevel.CODESIZE: + ReduceLiteralsCodesize(ac, fn).run_pass() + DFTPass(ac, fn).run_pass() diff --git a/vyper/venom/passes/__init__.py b/vyper/venom/passes/__init__.py index fe1e387c56..b49791ee24 100644 --- a/vyper/venom/passes/__init__.py +++ b/vyper/venom/passes/__init__.py @@ -2,6 +2,7 @@ from .branch_optimization import BranchOptimizationPass from .dft import DFTPass from .float_allocas import FloatAllocas +from .literals_codesize import ReduceLiteralsCodesize from .lower_dload import LowerDloadPass from .make_ssa import MakeSSA from .mem2var import Mem2Var diff --git a/vyper/venom/passes/literals_codesize.py b/vyper/venom/passes/literals_codesize.py new file mode 100644 index 0000000000..daf195dfd4 --- /dev/null +++ b/vyper/venom/passes/literals_codesize.py @@ -0,0 +1,58 @@ +from vyper.utils import evm_not +from vyper.venom.basicblock import IRLiteral +from vyper.venom.passes.base_pass import IRPass + +# not takes 1 byte1, so it makes sense to use it when we can save at least +# 1 byte +NOT_THRESHOLD = 1 + +# shl takes 3 bytes, so it makes sense to use it when we can save at least +# 3 bytes +SHL_THRESHOLD = 3 + + +class ReduceLiteralsCodesize(IRPass): + def run_pass(self): + for bb in self.function.get_basic_blocks(): + self._process_bb(bb) + + def _process_bb(self, bb): + for inst in bb.instructions: + if inst.opcode != "store": + continue + + (op,) = inst.operands + if not isinstance(op, IRLiteral): + continue + + val = op.value % (2**256) + + # calculate amount of bits saved by not optimization + not_benefit = ((len(hex(val)) // 2 - len(hex(evm_not(val))) // 2) - NOT_THRESHOLD) * 8 + + # calculate amount of bits saved by shl optimization + binz = bin(val)[2:] + ix = len(binz) - binz.rfind("1") + shl_benefit = ix - SHL_THRESHOLD * 8 + + if not_benefit <= 0 and shl_benefit <= 0: + # no optimization can be done here + continue + + if not_benefit >= shl_benefit: + assert not_benefit > 0 # implied by previous conditions + # transform things like 0xffff...01 to (not 0xfe) + inst.opcode = "not" + op.value = evm_not(val) + continue + else: + assert shl_benefit > 0 # implied by previous conditions + # transform things like 0x123400....000 to 0x1234 << ... + ix -= 1 + # sanity check + assert (val >> ix) << ix == val, val + assert (val >> ix) & 1 == 1, val + + inst.opcode = "shl" + inst.operands = [IRLiteral(val >> ix), IRLiteral(ix)] + continue diff --git a/vyper/venom/passes/sccp/eval.py b/vyper/venom/passes/sccp/eval.py index b5786bb304..99f0ba70d9 100644 --- a/vyper/venom/passes/sccp/eval.py +++ b/vyper/venom/passes/sccp/eval.py @@ -5,6 +5,7 @@ SizeLimits, evm_div, evm_mod, + evm_not, evm_pow, signed_to_unsigned, unsigned_to_signed, @@ -95,11 +96,6 @@ def _evm_sar(shift_len: int, value: int) -> int: return value >> shift_len -def _evm_not(value: int) -> int: - assert 0 <= value <= SizeLimits.MAX_UINT256, "Value out of bounds" - return SizeLimits.MAX_UINT256 ^ value - - ARITHMETIC_OPS: dict[str, Callable[[list[IROperand]], int]] = { "add": _wrap_binop(operator.add), "sub": _wrap_binop(operator.sub), @@ -122,7 +118,7 @@ def _evm_not(value: int) -> int: "or": _wrap_binop(operator.or_), "and": _wrap_binop(operator.and_), "xor": _wrap_binop(operator.xor), - "not": _wrap_unop(_evm_not), + "not": _wrap_unop(evm_not), "signextend": _wrap_binop(_evm_signextend), "iszero": _wrap_unop(_evm_iszero), "shr": _wrap_binop(_evm_shr),