Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix[lang]: fix encoding of string literals #3091

Merged
Merged
45 changes: 28 additions & 17 deletions tests/functional/syntax/test_string.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from vyper import compiler
from vyper.exceptions import StructureException
from vyper.exceptions import InvalidLiteral, StructureException

valid_list = [
"""
@@ -11,25 +11,13 @@ def foo() -> String[10]:
""",
"""
@external
def foo():
x: String[11] = "¡très bien!"
""",
"""
@external
def foo() -> bool:
x: String[15] = "¡très bien!"
x: String[15] = "tres bien!"
y: String[15] = "test"
return x != y
""",
"""
@external
def foo() -> bool:
x: String[15] = "¡très bien!"
y: String[12] = "test"
return x != y
""",
"""
@external
def test() -> String[100]:
return "hello world!"
""",
@@ -46,13 +34,36 @@ def test_string_success(good_code):
"""
@external
def foo():
# invalid type annotation - should be String[N]
a: String = "abc"
""",
StructureException,
)
),
(
"""
@external
@view
def compile_hash() -> bytes32:
# GH issue #3088 - ord("è") == 232
return keccak256("è")
""",
InvalidLiteral,
),
(
"""
@external
def foo() -> bool:
# ord("¡") == 161
x: String[15] = "¡très bien!"
y: String[12] = "test"
return x != y
""",
InvalidLiteral,
),
]


@pytest.mark.parametrize("bad_code,exc", invalid_list)
def test_string_fail(assert_compile_failed, get_contract, bad_code, exc):
assert_compile_failed(lambda: get_contract(bad_code), exc)
def test_string_fail(get_contract, bad_code, exc):
with pytest.raises(exc):
compiler.compile_code(bad_code)
5 changes: 4 additions & 1 deletion vyper/ast/nodes.py
Original file line number Diff line number Diff line change
@@ -873,7 +873,10 @@ class Str(Constant):

def validate(self):
for c in self.value:
if ord(c) >= 256:
# in utf-8, bytes in the 128 and up range deviate from latin1 and
# can be control bytes, allowing multi-byte characters.
# reject them here.
if ord(c) >= 128:
raise InvalidLiteral(f"'{c}' is not an allowed string literal character", self)


29 changes: 10 additions & 19 deletions vyper/codegen/expr.py
Original file line number Diff line number Diff line change
@@ -61,13 +61,7 @@
from vyper.semantics.types.bytestrings import _BytestringT
from vyper.semantics.types.function import ContractFunctionT, MemberFunctionT
from vyper.semantics.types.shortcuts import BYTES32_T, UINT256_T
from vyper.utils import (
DECIMAL_DIVISOR,
bytes_to_int,
is_checksum_encoded,
string_to_bytes,
vyper_warn,
)
from vyper.utils import DECIMAL_DIVISOR, bytes_to_int, is_checksum_encoded, vyper_warn

ENVIRONMENT_VARIABLES = {"block", "msg", "tx", "chain"}

@@ -135,24 +129,21 @@

# String literals
def parse_Str(self):
bytez, bytez_length = string_to_bytes(self.expr.value)
typ = StringT(bytez_length)
return self._make_bytelike(typ, bytez, bytez_length)
bytez = self.expr.value.encode("utf-8")
return self._make_bytelike(StringT, bytez)

# Byte literals
def parse_Bytes(self):
return self._parse_bytes()
return self._make_bytelike(BytesT, self.expr.value)

Check warning on line 137 in vyper/codegen/expr.py

Codecov / codecov/patch

vyper/codegen/expr.py#L137

Added line #L137 was not covered by tests

def parse_HexBytes(self):
return self._parse_bytes()

def _parse_bytes(self):
bytez = self.expr.value
bytez_length = len(self.expr.value)
typ = BytesT(bytez_length)
return self._make_bytelike(typ, bytez, bytez_length)
# HexBytes already has value as bytes
assert isinstance(self.expr.value, bytes)
return self._make_bytelike(BytesT, self.expr.value)

Check warning on line 142 in vyper/codegen/expr.py

Codecov / codecov/patch

vyper/codegen/expr.py#L141-L142

Added lines #L141 - L142 were not covered by tests

def _make_bytelike(self, btype, bytez, bytez_length):
def _make_bytelike(self, typeclass, bytez):
bytez_length = len(bytez)
btype = typeclass(bytez_length)
placeholder = self.context.new_internal_variable(btype)
seq = []
seq.append(["mstore", placeholder, bytez_length])
13 changes: 1 addition & 12 deletions vyper/utils.py
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@
import warnings
from typing import Generic, List, TypeVar, Union

from vyper.exceptions import CompilerPanic, DecimalOverrideException, InvalidLiteral, VyperException
from vyper.exceptions import CompilerPanic, DecimalOverrideException, VyperException

Check notice

Code scanning / CodeQL

Cyclic import Note

Import of module
vyper.exceptions
begins an import cycle.

_T = TypeVar("_T")

@@ -310,17 +310,6 @@
return int(d.to_integral_exact(decimal.ROUND_DOWN))


# Converts string to bytes
def string_to_bytes(str):
bytez = b""
for c in str:
if ord(c) >= 256:
raise InvalidLiteral(f"Cannot insert special character {c} into byte array")
bytez += bytes([ord(c)])
bytez_length = len(bytez)
return bytez, bytez_length


# Converts a provided hex string to an integer
def hex_to_int(inp):
if inp[:2] == "0x":