From fadd4de32be4f328b8ab17590d7fae262d0150fb Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 20 Dec 2024 14:48:51 -0500 Subject: [PATCH] feat[venom]: update text format for data section (#4414) this commit updates the parser and grammar with new syntax for the data section. it also adds cases for raw bytes (which are generated by `--optimize codesize`) in the data section, which are required to round-trip venom produced in `--optimize codesize` mode. new format: ``` data readonly { dbsection foo { db @label1 db x"abcd" } } ``` the new "readonly" modifier doesn't have disambiguating power yet, but it will make it easier to add "writable" data sections to the grammar in the future if we decide to add that. misc/refactor: - remove xfail from previously failing test cases. - refactor internal representation of data section to not use `IRInstruction` - add `_as_asm_symbol` for easier lowering of labels - tweak `IRFunction.__repr__()` to add the function name at close (helpful for debugging). - tweak `IRBasicBlock.__repr__()` to add basic block and function name at close to help with readability --------- Co-authored-by: Harry Kalogirou --- tests/functional/venom/parser/test_parsing.py | 86 +++++++++---------- tests/functional/venom/test_venom_repr.py | 6 -- tests/venom_utils.py | 7 +- vyper/venom/basicblock.py | 4 + vyper/venom/context.py | 50 +++++++++-- vyper/venom/function.py | 3 +- vyper/venom/ir_node_to_venom.py | 9 +- vyper/venom/parser.py | 47 +++++++--- vyper/venom/passes/normalization.py | 7 +- vyper/venom/venom_to_assembly.py | 49 ++++++----- 10 files changed, 164 insertions(+), 104 deletions(-) diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index f1fc59cf40..f18a51fe76 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -1,6 +1,6 @@ from tests.venom_utils import assert_ctx_eq -from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable -from vyper.venom.context import IRContext +from vyper.venom.basicblock import IRBasicBlock, IRLabel, IRLiteral, IRVariable +from vyper.venom.context import DataItem, DataSection, IRContext from vyper.venom.function import IRFunction from vyper.venom.parser import parse_venom @@ -11,8 +11,6 @@ def test_single_bb(): main: stop } - - [data] """ parsed_ctx = parse_venom(source) @@ -38,8 +36,6 @@ def test_multi_bb_single_fn(): has_callvalue: revert 0, 0 } - - [data] """ parsed_ctx = parse_venom(source) @@ -61,8 +57,6 @@ def test_multi_bb_single_fn(): has_callvalue_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) has_callvalue_bb.append_instruction("stop") - start_fn.last_variable = 4 - assert_ctx_eq(parsed_ctx, expected_ctx) @@ -74,15 +68,16 @@ def test_data_section(): stop } - [data] - dbname @selector_buckets - db @selector_bucket_0 - db @fallback - db @selector_bucket_2 - db @selector_bucket_3 - db @fallback - db @selector_bucket_5 - db @selector_bucket_6 + data readonly { + dbsection selector_buckets: + db @selector_bucket_0 + db @fallback + db @selector_bucket_2 + db @selector_bucket_3 + db @fallback + db @selector_bucket_5 + db @selector_bucket_6 + } """ ) @@ -91,14 +86,18 @@ def test_data_section(): entry_fn.get_basic_block("entry").append_instruction("stop") expected_ctx.data_segment = [ - IRInstruction("dbname", [IRLabel("selector_buckets")]), - IRInstruction("db", [IRLabel("selector_bucket_0")]), - IRInstruction("db", [IRLabel("fallback")]), - IRInstruction("db", [IRLabel("selector_bucket_2")]), - IRInstruction("db", [IRLabel("selector_bucket_3")]), - IRInstruction("db", [IRLabel("fallback")]), - IRInstruction("db", [IRLabel("selector_bucket_5")]), - IRInstruction("db", [IRLabel("selector_bucket_6")]), + DataSection( + IRLabel("selector_buckets"), + [ + DataItem(IRLabel("selector_bucket_0")), + DataItem(IRLabel("fallback")), + DataItem(IRLabel("selector_bucket_2")), + DataItem(IRLabel("selector_bucket_3")), + DataItem(IRLabel("fallback")), + DataItem(IRLabel("selector_bucket_5")), + DataItem(IRLabel("selector_bucket_6")), + ], + ) ] assert_ctx_eq(parsed_ctx, expected_ctx) @@ -126,8 +125,6 @@ def test_multi_function(): has_value: revert 0, 0 } - - [data] """ ) @@ -157,8 +154,6 @@ def test_multi_function(): value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) value_bb.append_instruction("stop") - check_fn.last_variable = 2 - assert_ctx_eq(parsed_ctx, expected_ctx) @@ -185,13 +180,14 @@ def test_multi_function_and_data(): revert 0, 0 } - [data] - dbname @selector_buckets - db @selector_bucket_0 - db @fallback - db @selector_bucket_2 - db @selector_bucket_3 - db @selector_bucket_6 + data readonly { + dbsection selector_buckets: + db @selector_bucket_0 + db @fallback + db @selector_bucket_2 + db @selector_bucket_3 + db @selector_bucket_6 + } """ ) @@ -221,15 +217,17 @@ def test_multi_function_and_data(): value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) value_bb.append_instruction("stop") - check_fn.last_variable = 2 - expected_ctx.data_segment = [ - IRInstruction("dbname", [IRLabel("selector_buckets")]), - IRInstruction("db", [IRLabel("selector_bucket_0")]), - IRInstruction("db", [IRLabel("fallback")]), - IRInstruction("db", [IRLabel("selector_bucket_2")]), - IRInstruction("db", [IRLabel("selector_bucket_3")]), - IRInstruction("db", [IRLabel("selector_bucket_6")]), + DataSection( + IRLabel("selector_buckets"), + [ + DataItem(IRLabel("selector_bucket_0")), + DataItem(IRLabel("fallback")), + DataItem(IRLabel("selector_bucket_2")), + DataItem(IRLabel("selector_bucket_3")), + DataItem(IRLabel("selector_bucket_6")), + ], + ) ] assert_ctx_eq(parsed_ctx, expected_ctx) diff --git a/tests/functional/venom/test_venom_repr.py b/tests/functional/venom/test_venom_repr.py index caa315dbbb..c25ce381d8 100644 --- a/tests/functional/venom/test_venom_repr.py +++ b/tests/functional/venom/test_venom_repr.py @@ -4,7 +4,6 @@ from tests.venom_utils import assert_ctx_eq, parse_venom from vyper.compiler import compile_code -from vyper.compiler.settings import OptimizationLevel from vyper.venom.context import IRContext """ @@ -18,11 +17,6 @@ def get_example_vy_filenames(): @pytest.mark.parametrize("vy_filename", get_example_vy_filenames()) def test_round_trip(vy_filename, optimize, request): - if optimize == OptimizationLevel.CODESIZE: - # codesize optimization issues things like `db b"\x12\x34"` which we - # don't handle. - request.node.add_marker(pytest.mark.xfail(strict=False, reason="unimplemented in parser")) - path = f"examples/{vy_filename}" with open(path) as f: vyper_source = f.read() diff --git a/tests/venom_utils.py b/tests/venom_utils.py index a67df6c275..85298ccb87 100644 --- a/tests/venom_utils.py +++ b/tests/venom_utils.py @@ -36,14 +36,11 @@ def assert_fn_eq(fn1: IRFunction, fn2: IRFunction): def assert_ctx_eq(ctx1: IRContext, ctx2: IRContext): - assert len(ctx1.functions) == len(ctx2.functions) for label1, fn1 in ctx1.functions.items(): assert label1 in ctx2.functions assert_fn_eq(fn1, ctx2.functions[label1]) + assert len(ctx1.functions) == len(ctx2.functions) # check entry function is the same assert next(iter(ctx1.functions.keys())) == next(iter(ctx2.functions.keys())) - - assert len(ctx1.data_segment) == len(ctx2.data_segment) - for d1, d2 in zip(ctx1.data_segment, ctx2.data_segment): - assert instructions_eq(d1, d2), f"data: [{d1}] != [{d2}]" + assert ctx1.data_segment == ctx2.data_segment, ctx2.data_segment diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 4b8eec2263..b0f0b00341 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -661,4 +661,8 @@ def __repr__(self) -> str: s += f" OUT={[bb.label for bb in self.cfg_out]} => {self.out_vars}\n" for instruction in self.instructions: s += f" {str(instruction).strip()}\n" + if len(self.instructions) > 30: + s += f" ; {self.label}\n" + if len(self.instructions) > 30 or self.parent.num_basic_blocks > 5: + s += f" ; ({self.parent.name})\n\n" return s diff --git a/vyper/venom/context.py b/vyper/venom/context.py index 391da3e189..0c5cbc379c 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -1,14 +1,40 @@ +import textwrap +from dataclasses import dataclass, field from typing import Optional -from vyper.venom.basicblock import IRInstruction, IRLabel, IROperand +from vyper.venom.basicblock import IRLabel from vyper.venom.function import IRFunction +@dataclass +class DataItem: + data: IRLabel | bytes # can be raw data or bytes + + def __str__(self): + if isinstance(self.data, IRLabel): + return f"@{self.data}" + else: + assert isinstance(self.data, bytes) + return f'x"{self.data.hex()}"' + + +@dataclass +class DataSection: + label: IRLabel + data_items: list[DataItem] = field(default_factory=list) + + def __str__(self): + ret = [f"dbsection {self.label.value}:"] + for item in self.data_items: + ret.append(f" db {item}") + return "\n".join(ret) + + class IRContext: functions: dict[IRLabel, IRFunction] ctor_mem_size: Optional[int] immutables_len: Optional[int] - data_segment: list[IRInstruction] + data_segment: list[DataSection] last_label: int def __init__(self) -> None: @@ -47,11 +73,16 @@ def chain_basic_blocks(self) -> None: for fn in self.functions.values(): fn.chain_basic_blocks() - def append_data(self, opcode: str, args: list[IROperand]) -> None: + def append_data_section(self, name: IRLabel) -> None: + self.data_segment.append(DataSection(name)) + + def append_data_item(self, data: IRLabel | bytes) -> None: """ - Append data + Append data to current data section """ - self.data_segment.append(IRInstruction(opcode, args)) # type: ignore + assert len(self.data_segment) > 0 + data_section = self.data_segment[-1] + data_section.data_items.append(DataItem(data)) def as_graph(self) -> str: s = ["digraph G {"] @@ -64,12 +95,13 @@ def as_graph(self) -> str: def __repr__(self) -> str: s = [] for fn in self.functions.values(): - s.append(fn.__repr__()) + s.append(IRFunction.__repr__(fn)) s.append("\n") if len(self.data_segment) > 0: - s.append("\n[data]") - for inst in self.data_segment: - s.append(f" {inst}") + s.append("data readonly {") + for data_section in self.data_segment: + s.append(textwrap.indent(DataSection.__str__(data_section), " ")) + s.append("}") return "\n".join(s) diff --git a/vyper/venom/function.py b/vyper/venom/function.py index 5709f65be4..f02da77fe3 100644 --- a/vyper/venom/function.py +++ b/vyper/venom/function.py @@ -228,4 +228,5 @@ def __repr__(self) -> str: bb_str = textwrap.indent(str(bb), " ") ret += f"{bb_str}\n" ret = ret.strip() + "\n}" - return ret.strip() + ret += f" ; close function {self.name}" + return ret diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 4dcc5ee4e6..f46457b77f 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -367,14 +367,15 @@ def _convert_ir_bb(fn, ir, symbols): elif ir.value == "symbol": return IRLabel(ir.args[0].value, True) elif ir.value == "data": - label = IRLabel(ir.args[0].value) - ctx.append_data("dbname", [label]) + label = IRLabel(ir.args[0].value, True) + ctx.append_data_section(label) for c in ir.args[1:]: if isinstance(c.value, bytes): - ctx.append_data("db", [c.value]) # type: ignore + ctx.append_data_item(c.value) elif isinstance(c, IRnode): data = _convert_ir_bb(fn, c, symbols) - ctx.append_data("db", [data]) # type: ignore + assert isinstance(data, IRLabel) # help mypy + ctx.append_data_item(data) elif ir.value == "label": label = IRLabel(ir.args[0].value, True) bb = fn.get_basic_block() diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index 9ab223179e..5ccc29b7a4 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -10,12 +10,13 @@ IROperand, IRVariable, ) -from vyper.venom.context import IRContext +from vyper.venom.context import DataItem, DataSection, IRContext from vyper.venom.function import IRFunction VENOM_GRAMMAR = """ %import common.CNAME %import common.DIGIT + %import common.HEXDIGIT %import common.LETTER %import common.WS %import common.INT @@ -25,13 +26,15 @@ # Allow multiple comment styles COMMENT: ";" /[^\\n]*/ | "//" /[^\\n]*/ | "#" /[^\\n]*/ - start: function* data_section? + start: function* data_segment? # TODO: consider making entry block implicit, e.g. # `"{" instruction+ block* "}"` function: "function" LABEL_IDENT "{" block* "}" - data_section: "[data]" instruction* + data_segment: "data" "readonly" "{" data_section* "}" + data_section: "dbsection" LABEL_IDENT ":" data_item+ + data_item: "db" (HEXSTR | LABEL) block: LABEL_IDENT ":" "\\n" statement* @@ -53,7 +56,9 @@ LABEL_IDENT: (NAME | ESCAPED_STRING) LABEL: "@" LABEL_IDENT + DOUBLE_QUOTE: "\\"" NAME: (DIGIT|LETTER|"_")+ + HEXSTR: "x" DOUBLE_QUOTE (HEXDIGIT|"_")+ DOUBLE_QUOTE %ignore WS %ignore COMMENT @@ -101,17 +106,21 @@ def _unescape(s: str): return s -class _DataSegment: - def __init__(self, instructions): - self.instructions = instructions +class _TypedItem: + def __init__(self, children): + self.children = children + + +class _DataSegment(_TypedItem): + pass class VenomTransformer(Transformer): def start(self, children) -> IRContext: ctx = IRContext() - data_section = [] - if isinstance(children[-1], _DataSegment): - data_section = children.pop().instructions + if len(children) > 0 and isinstance(children[-1], _DataSegment): + ctx.data_segment = children.pop().children + funcs = children for fn_name, blocks in funcs: fn = ctx.create_function(fn_name) @@ -130,8 +139,6 @@ def start(self, children) -> IRContext: _set_last_var(fn) _set_last_label(ctx) - ctx.data_segment = data_section - return ctx def function(self, children) -> tuple[str, list[tuple[str, list[IRInstruction]]]]: @@ -141,9 +148,25 @@ def function(self, children) -> tuple[str, list[tuple[str, list[IRInstruction]]] def statement(self, children): return children[0] - def data_section(self, children): + def data_segment(self, children): return _DataSegment(children) + def data_section(self, children): + label = IRLabel(children[0], True) + data_items = children[1:] + assert all(isinstance(item, DataItem) for item in data_items) + return DataSection(label, data_items) + + def data_item(self, children): + item = children[0] + if isinstance(item, IRLabel): + return DataItem(item) + assert item.startswith('x"') + assert item.endswith('"') + item = item.removeprefix('x"').removesuffix('"') + item = item.replace("_", "") + return DataItem(bytes.fromhex(item)) + def block(self, children) -> tuple[str, list[IRInstruction]]: label, *instructions = children return label, instructions diff --git a/vyper/venom/passes/normalization.py b/vyper/venom/passes/normalization.py index 7ca242c74e..37ba1023c9 100644 --- a/vyper/venom/passes/normalization.py +++ b/vyper/venom/passes/normalization.py @@ -45,9 +45,10 @@ def _insert_split_basicblock(self, bb: IRBasicBlock, in_bb: IRBasicBlock) -> IRB inst.operands[i] = split_bb.label # Update the labels in the data segment - for inst in fn.ctx.data_segment: - if inst.opcode == "db" and inst.operands[0] == bb.label: - inst.operands[0] = split_bb.label + for data_section in fn.ctx.data_segment: + for item in data_section.data_items: + if item.data == bb.label: + item.data = split_bb.label return split_bb diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index e136932f51..048555a221 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -122,6 +122,11 @@ def apply_line_numbers(inst: IRInstruction, asm) -> list[str]: return ret # type: ignore +def _as_asm_symbol(label: IRLabel) -> str: + # Lower an IRLabel to an assembly symbol + return f"_sym_{label.value}" + + # TODO: "assembly" gets into the recursion due to how the original # IR was structured recursively in regards with the deploy instruction. # There, recursing into the deploy instruction was by design, and @@ -183,19 +188,19 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]: asm.extend(_REVERT_POSTAMBLE) # Append data segment - data_segments: dict = dict() - for inst in ctx.data_segment: - if inst.opcode == "dbname": - label = inst.operands[0] - data_segments[label] = [DataHeader(f"_sym_{label.value}")] - elif inst.opcode == "db": - data = inst.operands[0] + for data_section in ctx.data_segment: + label = data_section.label + asm_data_section: list[Any] = [] + asm_data_section.append(DataHeader(_as_asm_symbol(label))) + for item in data_section.data_items: + data = item.data if isinstance(data, IRLabel): - data_segments[label].append(f"_sym_{data.value}") + asm_data_section.append(_as_asm_symbol(data)) else: - data_segments[label].append(data) + assert isinstance(data, bytes) + asm_data_section.append(data) - asm.extend(list(data_segments.values())) + asm.append(asm_data_section) if no_optimize is False: optimize_assembly(top_asm) @@ -260,7 +265,7 @@ def _emit_input_operands( # invoke emits the actual instruction itself so we don't need # to emit it here but we need to add it to the stack map if inst.opcode != "invoke": - assembly.append(f"_sym_{op.value}") + assembly.append(_as_asm_symbol(op)) stack.push(op) continue @@ -294,7 +299,7 @@ def _generate_evm_for_basicblock_r( asm = [] # assembly entry point into the block - asm.append(f"_sym_{basicblock.label.value}") + asm.append(_as_asm_symbol(basicblock.label)) asm.append("JUMPDEST") if len(basicblock.cfg_in) == 1: @@ -409,7 +414,9 @@ def _generate_evm_for_instruction( return apply_line_numbers(inst, assembly) if opcode == "offset": - assembly.extend(["_OFST", f"_sym_{inst.operands[1].value}", inst.operands[0].value]) + ofst, label = inst.operands + assert isinstance(label, IRLabel) # help mypy + assembly.extend(["_OFST", _as_asm_symbol(label), ofst.value]) assert isinstance(inst.output, IROperand), "Offset must have output" stack.push(inst.output) return apply_line_numbers(inst, assembly) @@ -471,24 +478,26 @@ def _generate_evm_for_instruction( pass elif opcode == "store": pass + elif opcode in ["codecopy", "dloadbytes"]: + assembly.append("CODECOPY") elif opcode == "dbname": pass elif opcode == "jnz": # jump if not zero - if_nonzero_label = inst.operands[1] - if_zero_label = inst.operands[2] - assembly.append(f"_sym_{if_nonzero_label.value}") + if_nonzero_label, if_zero_label = inst.get_label_operands() + assembly.append(_as_asm_symbol(if_nonzero_label)) assembly.append("JUMPI") # make sure the if_zero_label will be optimized out # assert if_zero_label == next(iter(inst.parent.cfg_out)).label - assembly.append(f"_sym_{if_zero_label.value}") + assembly.append(_as_asm_symbol(if_zero_label)) assembly.append("JUMP") elif opcode == "jmp": - assert isinstance(inst.operands[0], IRLabel) - assembly.append(f"_sym_{inst.operands[0].value}") + (target,) = inst.operands + assert isinstance(target, IRLabel) + assembly.append(_as_asm_symbol(target)) assembly.append("JUMP") elif opcode == "djmp": assert isinstance( @@ -503,7 +512,7 @@ def _generate_evm_for_instruction( assembly.extend( [ f"_sym_label_ret_{self.label_counter}", - f"_sym_{target.value}", + _as_asm_symbol(target), "JUMP", f"_sym_label_ret_{self.label_counter}", "JUMPDEST",