Skip to content

Commit

Permalink
feat: implement new IR for vyper (venom IR) (vyperlang#3659)
Browse files Browse the repository at this point in the history
this commit implements a new IR for the vyper compiler. most of the
implementation is self-contained in the `./vyper/venom/` directory.

Venom IR is LLVM-"inspired", although we do not use LLVM on account of:

1) not wanting to introduce a large external dependency
2) no EVM backend exists for LLVM, so we would have to write one
   ourselves. see prior work at https://github.com/etclabscore/evm_llvm.
   fundamentally, LLVM is architected to target register machines; an
   EVM backend could conceivably be implmented, but it would always
   feel "bolted" on.
3) integration with LLVM would invariably be very complex
4) one advantage of using LLVM is getting multiple backends "for free",
   but in our case, none of the backends we are interested in
   (particularly EVM) have LLVM implementations.

that being said, Venom is close enough to LLVM that it would seem fairly
straightforward to pass "in-and-out" of LLVM, converting to LLVM to take
advantage of its optimization passes and/or analysis utilities, and then
converting back to Venom for final EVM emission, if that becomes
desirable down the line. it could even provided as an "extra" -- if LLVM
is installed on the system and enabled for the build, pass to LLVM for
extra optimization, but otherwise the compiler being self-contained.

for more details about the design and architecture of Venom IR, see
`./vyper/venom/README.md`.

note that this commit specifically focuses on the architecture, design
and implementation of Venom. that is, more focus was spent on
architecting the Venom compiler itself. the Vyper frontend does not emit
Venom natively yet, Venom emission is implemented as a translation step
from the current s-expr based IR to Venom. the translation is not
feature-complete, and may have bugs. that being said, vyper compilation
via Venom is experimentally available by passing the
`--experimental-codegen` flag to vyper on the CLI. incrementally
refactoring the codegen to use Venom instead of the earlier s-expr IR
will be the next area of focus of development.

---------

Co-authored-by: Charles Cooper <[email protected]>
  • Loading branch information
harkal and charles-cooper authored Dec 1, 2023
1 parent 9a982bd commit cbac5ab
Show file tree
Hide file tree
Showing 28 changed files with 2,994 additions and 30 deletions.
28 changes: 28 additions & 0 deletions tests/compiler/venom/test_duplicate_operands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from vyper.compiler.settings import OptimizationLevel
from vyper.venom import generate_assembly_experimental
from vyper.venom.basicblock import IRLiteral
from vyper.venom.function import IRFunction


def test_duplicate_operands():
"""
Test the duplicate operands code generation.
The venom code:
%1 = 10
%2 = add %1, %1
%3 = mul %1, %2
stop
Should compile to: [PUSH1, 10, DUP1, DUP1, DUP1, ADD, MUL, STOP]
"""
ctx = IRFunction()

op = ctx.append_instruction("store", [IRLiteral(10)])
sum = ctx.append_instruction("add", [op, op])
ctx.append_instruction("mul", [sum, op])
ctx.append_instruction("stop", [], False)

asm = generate_assembly_experimental(ctx, OptimizationLevel.CODESIZE)

assert asm == ["PUSH1", 10, "DUP1", "DUP1", "DUP1", "ADD", "MUL", "STOP", "REVERT"]
96 changes: 96 additions & 0 deletions tests/compiler/venom/test_multi_entry_block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from vyper.venom.analysis import calculate_cfg
from vyper.venom.basicblock import IRLiteral
from vyper.venom.function import IRBasicBlock, IRFunction, IRLabel
from vyper.venom.passes.normalization import NormalizationPass


def test_multi_entry_block_1():
ctx = IRFunction()

finish_label = IRLabel("finish")
target_label = IRLabel("target")
block_1_label = IRLabel("block_1", ctx)

op = ctx.append_instruction("store", [IRLiteral(10)])
acc = ctx.append_instruction("add", [op, op])
ctx.append_instruction("jnz", [acc, finish_label, block_1_label], False)

block_1 = IRBasicBlock(block_1_label, ctx)
ctx.append_basic_block(block_1)
acc = ctx.append_instruction("add", [acc, op])
op = ctx.append_instruction("store", [IRLiteral(10)])
ctx.append_instruction("mstore", [acc, op], False)
ctx.append_instruction("jnz", [acc, finish_label, target_label], False)

target_bb = IRBasicBlock(target_label, ctx)
ctx.append_basic_block(target_bb)
ctx.append_instruction("mul", [acc, acc])
ctx.append_instruction("jmp", [finish_label], False)

finish_bb = IRBasicBlock(finish_label, ctx)
ctx.append_basic_block(finish_bb)
ctx.append_instruction("stop", [], False)

calculate_cfg(ctx)
assert not ctx.normalized, "CFG should not be normalized"

NormalizationPass.run_pass(ctx)

assert ctx.normalized, "CFG should be normalized"

finish_bb = ctx.get_basic_block(finish_label.value)
cfg_in = list(finish_bb.cfg_in.keys())
assert cfg_in[0].label.value == "target", "Should contain target"
assert cfg_in[1].label.value == "finish_split_global", "Should contain finish_split_global"
assert cfg_in[2].label.value == "finish_split_block_1", "Should contain finish_split_block_1"


# more complicated one
def test_multi_entry_block_2():
ctx = IRFunction()

finish_label = IRLabel("finish")
target_label = IRLabel("target")
block_1_label = IRLabel("block_1", ctx)
block_2_label = IRLabel("block_2", ctx)

op = ctx.append_instruction("store", [IRLiteral(10)])
acc = ctx.append_instruction("add", [op, op])
ctx.append_instruction("jnz", [acc, finish_label, block_1_label], False)

block_1 = IRBasicBlock(block_1_label, ctx)
ctx.append_basic_block(block_1)
acc = ctx.append_instruction("add", [acc, op])
op = ctx.append_instruction("store", [IRLiteral(10)])
ctx.append_instruction("mstore", [acc, op], False)
ctx.append_instruction("jnz", [acc, target_label, finish_label], False)

block_2 = IRBasicBlock(block_2_label, ctx)
ctx.append_basic_block(block_2)
acc = ctx.append_instruction("add", [acc, op])
op = ctx.append_instruction("store", [IRLiteral(10)])
ctx.append_instruction("mstore", [acc, op], False)
# switch the order of the labels, for fun
ctx.append_instruction("jnz", [acc, finish_label, target_label], False)

target_bb = IRBasicBlock(target_label, ctx)
ctx.append_basic_block(target_bb)
ctx.append_instruction("mul", [acc, acc])
ctx.append_instruction("jmp", [finish_label], False)

finish_bb = IRBasicBlock(finish_label, ctx)
ctx.append_basic_block(finish_bb)
ctx.append_instruction("stop", [], False)

calculate_cfg(ctx)
assert not ctx.normalized, "CFG should not be normalized"

NormalizationPass.run_pass(ctx)

assert ctx.normalized, "CFG should be normalized"

finish_bb = ctx.get_basic_block(finish_label.value)
cfg_in = list(finish_bb.cfg_in.keys())
assert cfg_in[0].label.value == "target", "Should contain target"
assert cfg_in[1].label.value == "finish_split_global", "Should contain finish_split_global"
assert cfg_in[2].label.value == "finish_split_block_1", "Should contain finish_split_block_1"
5 changes: 5 additions & 0 deletions tests/compiler/venom/test_stack_at_external_return.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
def test_stack_at_external_return():
"""
TODO: USE BOA DO GENERATE THIS TEST
"""
pass
8 changes: 8 additions & 0 deletions vyper/cli/vyper_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ def _parse_args(argv):
"-p", help="Set the root path for contract imports", default=".", dest="root_folder"
)
parser.add_argument("-o", help="Set the output path", dest="output_path")
parser.add_argument(
"--experimental-codegen",
help="The compiler use the new IR codegen. This is an experimental feature.",
action="store_true",
)

args = parser.parse_args(argv)

Expand Down Expand Up @@ -188,6 +193,7 @@ def _parse_args(argv):
settings,
args.storage_layout,
args.no_bytecode_metadata,
args.experimental_codegen,
)

if args.output_path:
Expand Down Expand Up @@ -225,6 +231,7 @@ def compile_files(
settings: Optional[Settings] = None,
storage_layout_paths: list[str] = None,
no_bytecode_metadata: bool = False,
experimental_codegen: bool = False,
) -> dict:
root_path = Path(root_folder).resolve()
if not root_path.exists():
Expand Down Expand Up @@ -275,6 +282,7 @@ def compile_files(
storage_layout_override=storage_layout_override,
show_gas_estimates=show_gas_estimates,
no_bytecode_metadata=no_bytecode_metadata,
experimental_codegen=experimental_codegen,
)

ret[file_path] = output
Expand Down
4 changes: 4 additions & 0 deletions vyper/codegen/function_definitions/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,5 +162,9 @@ def generate_ir_for_function(
# (note: internal functions do not need to adjust gas estimate since
mem_expansion_cost = calc_mem_gas(func_t._ir_info.frame_info.mem_used) # type: ignore
ret.common_ir.add_gas_estimate += mem_expansion_cost # type: ignore
ret.common_ir.passthrough_metadata["func_t"] = func_t # type: ignore
ret.common_ir.passthrough_metadata["frame_info"] = frame_info # type: ignore
else:
ret.func_ir.passthrough_metadata["frame_info"] = frame_info # type: ignore

return ret
4 changes: 3 additions & 1 deletion vyper/codegen/function_definitions/internal_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,6 @@ def generate_ir_for_internal_function(
["seq"] + nonreentrant_post + [["exit_to", "return_pc"]],
]

return IRnode.from_list(["seq", body, cleanup_routine])
ir_node = IRnode.from_list(["seq", body, cleanup_routine])
ir_node.passthrough_metadata["func_t"] = func_t
return ir_node
16 changes: 16 additions & 0 deletions vyper/codegen/ir_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ class IRnode:
valency: int
args: List["IRnode"]
value: Union[str, int]
is_self_call: bool
passthrough_metadata: dict[str, Any]
func_ir: Any
common_ir: Any

def __init__(
self,
Expand All @@ -184,6 +188,8 @@ def __init__(
mutable: bool = True,
add_gas_estimate: int = 0,
encoding: Encoding = Encoding.VYPER,
is_self_call: bool = False,
passthrough_metadata: dict[str, Any] = None,
):
if args is None:
args = []
Expand All @@ -201,6 +207,10 @@ def __init__(
self.add_gas_estimate = add_gas_estimate
self.encoding = encoding
self.as_hex = AS_HEX_DEFAULT
self.is_self_call = is_self_call
self.passthrough_metadata = passthrough_metadata or {}
self.func_ir = None
self.common_ir = None

assert self.value is not None, "None is not allowed as IRnode value"

Expand Down Expand Up @@ -585,6 +595,8 @@ def from_list(
error_msg: Optional[str] = None,
mutable: bool = True,
add_gas_estimate: int = 0,
is_self_call: bool = False,
passthrough_metadata: dict[str, Any] = None,
encoding: Encoding = Encoding.VYPER,
) -> "IRnode":
if isinstance(typ, str):
Expand Down Expand Up @@ -617,6 +629,8 @@ def from_list(
source_pos=source_pos,
encoding=encoding,
error_msg=error_msg,
is_self_call=is_self_call,
passthrough_metadata=passthrough_metadata,
)
else:
return cls(
Expand All @@ -630,4 +644,6 @@ def from_list(
add_gas_estimate=add_gas_estimate,
encoding=encoding,
error_msg=error_msg,
is_self_call=is_self_call,
passthrough_metadata=passthrough_metadata,
)
4 changes: 3 additions & 1 deletion vyper/codegen/return_.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def finalize(fill_return_buffer):
cleanup_loops = "cleanup_repeat" if context.forvars else "seq"
# NOTE: because stack analysis is incomplete, cleanup_repeat must
# come after fill_return_buffer otherwise the stack will break
return IRnode.from_list(["seq", fill_return_buffer, cleanup_loops, jump_to_exit])
jump_to_exit_ir = IRnode.from_list(jump_to_exit)
jump_to_exit_ir.passthrough_metadata["func_t"] = func_t
return IRnode.from_list(["seq", fill_return_buffer, cleanup_loops, jump_to_exit_ir])

if context.return_type is None:
if context.is_internal:
Expand Down
2 changes: 2 additions & 0 deletions vyper/codegen/self_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,6 @@ def ir_for_self_call(stmt_expr, context):
add_gas_estimate=func_t._ir_info.gas_estimate,
)
o.is_self_call = True
o.passthrough_metadata["func_t"] = func_t
o.passthrough_metadata["args_ir"] = args_ir
return o
2 changes: 2 additions & 0 deletions vyper/compiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def compile_code(
no_bytecode_metadata: bool = False,
show_gas_estimates: bool = False,
exc_handler: Optional[Callable] = None,
experimental_codegen: bool = False,
) -> dict:
"""
Generate consumable compiler output(s) from a single contract source code.
Expand Down Expand Up @@ -104,6 +105,7 @@ def compile_code(
storage_layout_override,
show_gas_estimates,
no_bytecode_metadata,
experimental_codegen,
)

ret = {}
Expand Down
28 changes: 24 additions & 4 deletions vyper/compiler/phases.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from vyper.semantics import set_data_positions, validate_semantics
from vyper.semantics.types.function import ContractFunctionT
from vyper.typing import StorageLayout
from vyper.venom import generate_assembly_experimental, generate_ir

DEFAULT_CONTRACT_NAME = PurePath("VyperContract.vy")

Expand Down Expand Up @@ -60,6 +61,7 @@ def __init__(
storage_layout: StorageLayout = None,
show_gas_estimates: bool = False,
no_bytecode_metadata: bool = False,
experimental_codegen: bool = False,
) -> None:
"""
Initialization method.
Expand All @@ -78,14 +80,18 @@ def __init__(
Show gas estimates for abi and ir output modes
no_bytecode_metadata: bool, optional
Do not add metadata to bytecode. Defaults to False
experimental_codegen: bool, optional
Use experimental codegen. Defaults to False
"""
# to force experimental codegen, uncomment:
# experimental_codegen = True
self.contract_path = contract_path
self.source_code = source_code
self.source_id = source_id
self.storage_layout_override = storage_layout
self.show_gas_estimates = show_gas_estimates
self.no_bytecode_metadata = no_bytecode_metadata

self.experimental_codegen = experimental_codegen
self.settings = settings or Settings()
self.input_bundle = input_bundle or FilesystemInputBundle([Path(".")])

Expand Down Expand Up @@ -160,7 +166,11 @@ def global_ctx(self) -> GlobalContext:
@cached_property
def _ir_output(self):
# fetch both deployment and runtime IR
return generate_ir_nodes(self.global_ctx, self.settings.optimize)
nodes = generate_ir_nodes(self.global_ctx, self.settings.optimize)
if self.experimental_codegen:
return [generate_ir(nodes[0]), generate_ir(nodes[1])]
else:
return nodes

@property
def ir_nodes(self) -> IRnode:
Expand All @@ -183,11 +193,21 @@ def function_signatures(self) -> dict[str, ContractFunctionT]:

@cached_property
def assembly(self) -> list:
return generate_assembly(self.ir_nodes, self.settings.optimize)
if self.experimental_codegen:
return generate_assembly_experimental(
self.ir_nodes, self.settings.optimize # type: ignore
)
else:
return generate_assembly(self.ir_nodes, self.settings.optimize)

@cached_property
def assembly_runtime(self) -> list:
return generate_assembly(self.ir_runtime, self.settings.optimize)
if self.experimental_codegen:
return generate_assembly_experimental(
self.ir_runtime, self.settings.optimize # type: ignore
)
else:
return generate_assembly(self.ir_runtime, self.settings.optimize)

@cached_property
def bytecode(self) -> bytes:
Expand Down
Loading

0 comments on commit cbac5ab

Please sign in to comment.