Skip to content

Commit

Permalink
feat[venom]: update text format for data section (vyperlang#4414)
Browse files Browse the repository at this point in the history
this commit updates the parser and grammar with new syntax for the data
section. it also adds cases for raw bytes (which are generated by
`--optimize codesize`) in the data section, which are required to
round-trip venom produced in `--optimize codesize` mode.

new format:
```
data readonly {
    dbsection foo {
        db @label1
        db x"abcd"
    }
}
```

the new "readonly" modifier doesn't have disambiguating power yet, but
it will make it easier to add "writable" data sections to the grammar in
the future if we decide to add that.

misc/refactor:
- remove xfail from previously failing test cases.
- refactor internal representation of data section to not use
  `IRInstruction`
- add `_as_asm_symbol` for easier lowering of labels
- tweak `IRFunction.__repr__()` to add the function name at close
  (helpful for debugging).
- tweak `IRBasicBlock.__repr__()` to add basic block and function name
  at close to help with readability

---------

Co-authored-by: Harry Kalogirou <[email protected]>
  • Loading branch information
charles-cooper and harkal authored Dec 20, 2024
1 parent a56d79d commit fadd4de
Show file tree
Hide file tree
Showing 10 changed files with 164 additions and 104 deletions.
86 changes: 42 additions & 44 deletions tests/functional/venom/parser/test_parsing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from tests.venom_utils import assert_ctx_eq
from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable
from vyper.venom.context import IRContext
from vyper.venom.basicblock import IRBasicBlock, IRLabel, IRLiteral, IRVariable
from vyper.venom.context import DataItem, DataSection, IRContext
from vyper.venom.function import IRFunction
from vyper.venom.parser import parse_venom

Expand All @@ -11,8 +11,6 @@ def test_single_bb():
main:
stop
}
[data]
"""

parsed_ctx = parse_venom(source)
Expand All @@ -38,8 +36,6 @@ def test_multi_bb_single_fn():
has_callvalue:
revert 0, 0
}
[data]
"""

parsed_ctx = parse_venom(source)
Expand All @@ -61,8 +57,6 @@ def test_multi_bb_single_fn():
has_callvalue_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0))
has_callvalue_bb.append_instruction("stop")

start_fn.last_variable = 4

assert_ctx_eq(parsed_ctx, expected_ctx)


Expand All @@ -74,15 +68,16 @@ def test_data_section():
stop
}
[data]
dbname @selector_buckets
db @selector_bucket_0
db @fallback
db @selector_bucket_2
db @selector_bucket_3
db @fallback
db @selector_bucket_5
db @selector_bucket_6
data readonly {
dbsection selector_buckets:
db @selector_bucket_0
db @fallback
db @selector_bucket_2
db @selector_bucket_3
db @fallback
db @selector_bucket_5
db @selector_bucket_6
}
"""
)

Expand All @@ -91,14 +86,18 @@ def test_data_section():
entry_fn.get_basic_block("entry").append_instruction("stop")

expected_ctx.data_segment = [
IRInstruction("dbname", [IRLabel("selector_buckets")]),
IRInstruction("db", [IRLabel("selector_bucket_0")]),
IRInstruction("db", [IRLabel("fallback")]),
IRInstruction("db", [IRLabel("selector_bucket_2")]),
IRInstruction("db", [IRLabel("selector_bucket_3")]),
IRInstruction("db", [IRLabel("fallback")]),
IRInstruction("db", [IRLabel("selector_bucket_5")]),
IRInstruction("db", [IRLabel("selector_bucket_6")]),
DataSection(
IRLabel("selector_buckets"),
[
DataItem(IRLabel("selector_bucket_0")),
DataItem(IRLabel("fallback")),
DataItem(IRLabel("selector_bucket_2")),
DataItem(IRLabel("selector_bucket_3")),
DataItem(IRLabel("fallback")),
DataItem(IRLabel("selector_bucket_5")),
DataItem(IRLabel("selector_bucket_6")),
],
)
]

assert_ctx_eq(parsed_ctx, expected_ctx)
Expand Down Expand Up @@ -126,8 +125,6 @@ def test_multi_function():
has_value:
revert 0, 0
}
[data]
"""
)

Expand Down Expand Up @@ -157,8 +154,6 @@ def test_multi_function():
value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0))
value_bb.append_instruction("stop")

check_fn.last_variable = 2

assert_ctx_eq(parsed_ctx, expected_ctx)


Expand All @@ -185,13 +180,14 @@ def test_multi_function_and_data():
revert 0, 0
}
[data]
dbname @selector_buckets
db @selector_bucket_0
db @fallback
db @selector_bucket_2
db @selector_bucket_3
db @selector_bucket_6
data readonly {
dbsection selector_buckets:
db @selector_bucket_0
db @fallback
db @selector_bucket_2
db @selector_bucket_3
db @selector_bucket_6
}
"""
)

Expand Down Expand Up @@ -221,15 +217,17 @@ def test_multi_function_and_data():
value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0))
value_bb.append_instruction("stop")

check_fn.last_variable = 2

expected_ctx.data_segment = [
IRInstruction("dbname", [IRLabel("selector_buckets")]),
IRInstruction("db", [IRLabel("selector_bucket_0")]),
IRInstruction("db", [IRLabel("fallback")]),
IRInstruction("db", [IRLabel("selector_bucket_2")]),
IRInstruction("db", [IRLabel("selector_bucket_3")]),
IRInstruction("db", [IRLabel("selector_bucket_6")]),
DataSection(
IRLabel("selector_buckets"),
[
DataItem(IRLabel("selector_bucket_0")),
DataItem(IRLabel("fallback")),
DataItem(IRLabel("selector_bucket_2")),
DataItem(IRLabel("selector_bucket_3")),
DataItem(IRLabel("selector_bucket_6")),
],
)
]

assert_ctx_eq(parsed_ctx, expected_ctx)
6 changes: 0 additions & 6 deletions tests/functional/venom/test_venom_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from tests.venom_utils import assert_ctx_eq, parse_venom
from vyper.compiler import compile_code
from vyper.compiler.settings import OptimizationLevel
from vyper.venom.context import IRContext

"""
Expand All @@ -18,11 +17,6 @@ def get_example_vy_filenames():

@pytest.mark.parametrize("vy_filename", get_example_vy_filenames())
def test_round_trip(vy_filename, optimize, request):
if optimize == OptimizationLevel.CODESIZE:
# codesize optimization issues things like `db b"\x12\x34"` which we
# don't handle.
request.node.add_marker(pytest.mark.xfail(strict=False, reason="unimplemented in parser"))

path = f"examples/{vy_filename}"
with open(path) as f:
vyper_source = f.read()
Expand Down
7 changes: 2 additions & 5 deletions tests/venom_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,11 @@ def assert_fn_eq(fn1: IRFunction, fn2: IRFunction):


def assert_ctx_eq(ctx1: IRContext, ctx2: IRContext):
assert len(ctx1.functions) == len(ctx2.functions)
for label1, fn1 in ctx1.functions.items():
assert label1 in ctx2.functions
assert_fn_eq(fn1, ctx2.functions[label1])
assert len(ctx1.functions) == len(ctx2.functions)

# check entry function is the same
assert next(iter(ctx1.functions.keys())) == next(iter(ctx2.functions.keys()))

assert len(ctx1.data_segment) == len(ctx2.data_segment)
for d1, d2 in zip(ctx1.data_segment, ctx2.data_segment):
assert instructions_eq(d1, d2), f"data: [{d1}] != [{d2}]"
assert ctx1.data_segment == ctx2.data_segment, ctx2.data_segment
4 changes: 4 additions & 0 deletions vyper/venom/basicblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,4 +661,8 @@ def __repr__(self) -> str:
s += f" OUT={[bb.label for bb in self.cfg_out]} => {self.out_vars}\n"
for instruction in self.instructions:
s += f" {str(instruction).strip()}\n"
if len(self.instructions) > 30:
s += f" ; {self.label}\n"
if len(self.instructions) > 30 or self.parent.num_basic_blocks > 5:
s += f" ; ({self.parent.name})\n\n"
return s
50 changes: 41 additions & 9 deletions vyper/venom/context.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,40 @@
import textwrap
from dataclasses import dataclass, field
from typing import Optional

from vyper.venom.basicblock import IRInstruction, IRLabel, IROperand
from vyper.venom.basicblock import IRLabel
from vyper.venom.function import IRFunction


@dataclass
class DataItem:
data: IRLabel | bytes # can be raw data or bytes

def __str__(self):
if isinstance(self.data, IRLabel):
return f"@{self.data}"
else:
assert isinstance(self.data, bytes)
return f'x"{self.data.hex()}"'


@dataclass
class DataSection:
label: IRLabel
data_items: list[DataItem] = field(default_factory=list)

def __str__(self):
ret = [f"dbsection {self.label.value}:"]
for item in self.data_items:
ret.append(f" db {item}")
return "\n".join(ret)


class IRContext:
functions: dict[IRLabel, IRFunction]
ctor_mem_size: Optional[int]
immutables_len: Optional[int]
data_segment: list[IRInstruction]
data_segment: list[DataSection]
last_label: int

def __init__(self) -> None:
Expand Down Expand Up @@ -47,11 +73,16 @@ def chain_basic_blocks(self) -> None:
for fn in self.functions.values():
fn.chain_basic_blocks()

def append_data(self, opcode: str, args: list[IROperand]) -> None:
def append_data_section(self, name: IRLabel) -> None:
self.data_segment.append(DataSection(name))

def append_data_item(self, data: IRLabel | bytes) -> None:
"""
Append data
Append data to current data section
"""
self.data_segment.append(IRInstruction(opcode, args)) # type: ignore
assert len(self.data_segment) > 0
data_section = self.data_segment[-1]
data_section.data_items.append(DataItem(data))

def as_graph(self) -> str:
s = ["digraph G {"]
Expand All @@ -64,12 +95,13 @@ def as_graph(self) -> str:
def __repr__(self) -> str:
s = []
for fn in self.functions.values():
s.append(fn.__repr__())
s.append(IRFunction.__repr__(fn))
s.append("\n")

if len(self.data_segment) > 0:
s.append("\n[data]")
for inst in self.data_segment:
s.append(f" {inst}")
s.append("data readonly {")
for data_section in self.data_segment:
s.append(textwrap.indent(DataSection.__str__(data_section), " "))
s.append("}")

return "\n".join(s)
3 changes: 2 additions & 1 deletion vyper/venom/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,4 +228,5 @@ def __repr__(self) -> str:
bb_str = textwrap.indent(str(bb), " ")
ret += f"{bb_str}\n"
ret = ret.strip() + "\n}"
return ret.strip()
ret += f" ; close function {self.name}"
return ret
9 changes: 5 additions & 4 deletions vyper/venom/ir_node_to_venom.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,14 +367,15 @@ def _convert_ir_bb(fn, ir, symbols):
elif ir.value == "symbol":
return IRLabel(ir.args[0].value, True)
elif ir.value == "data":
label = IRLabel(ir.args[0].value)
ctx.append_data("dbname", [label])
label = IRLabel(ir.args[0].value, True)
ctx.append_data_section(label)
for c in ir.args[1:]:
if isinstance(c.value, bytes):
ctx.append_data("db", [c.value]) # type: ignore
ctx.append_data_item(c.value)
elif isinstance(c, IRnode):
data = _convert_ir_bb(fn, c, symbols)
ctx.append_data("db", [data]) # type: ignore
assert isinstance(data, IRLabel) # help mypy
ctx.append_data_item(data)
elif ir.value == "label":
label = IRLabel(ir.args[0].value, True)
bb = fn.get_basic_block()
Expand Down
Loading

0 comments on commit fadd4de

Please sign in to comment.