Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Opaque values #1913

Merged
merged 5 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/src/eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,7 @@ pub fn subst<C: Cache>(
// loop. Although avoidable, this requires some care and is not currently needed.
| v @ Term::Fun(..)
| v @ Term::Lbl(_)
| v @ Term::Opaque(_)
| v @ Term::SealingKey(_)
| v @ Term::Enum(_)
| v @ Term::Import(_)
Expand Down
8 changes: 8 additions & 0 deletions core/src/eval/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3439,6 +3439,14 @@ fn eq<C: Cache>(
eq_pos: pos_op,
term: RichTerm::new(Term::Fun(i, rt), pos2),
}),
(Term::Opaque(v), _) => Err(EvalError::EqError {
eq_pos: pos_op,
term: RichTerm::new(Term::Opaque(v), pos1),
}),
(_, Term::Opaque(v)) => Err(EvalError::EqError {
eq_pos: pos_op,
term: RichTerm::new(Term::Opaque(v), pos2),
}),
(_, _) => Ok(EqResult::Bool(false)),
}
}
Expand Down
47 changes: 46 additions & 1 deletion core/src/eval/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use crate::term::make as mk_term;
use crate::term::Number;
use crate::term::{BinaryOp, StrChunk, UnaryOp};
use crate::transform::import_resolution::strict::resolve_imports;
use crate::{mk_app, mk_fun};
use crate::{mk_app, mk_fun, mk_record};
use assert_matches::assert_matches;
use codespan::Files;

/// Evaluate a term without import support.
Expand All @@ -18,6 +19,13 @@ fn eval_no_import(t: RichTerm) -> Result<Term, EvalError> {
.map(Term::from)
}

/// Fully evaluate a term without import support.
fn eval_full_no_import(t: RichTerm) -> Result<Term, EvalError> {
VirtualMachine::<_, CacheImpl>::new(DummyResolver {}, std::io::sink())
.eval_full(t)
.map(Term::from)
}

fn parse(s: &str) -> Option<RichTerm> {
let id = Files::new().add("<test>", String::from(s));

Expand Down Expand Up @@ -347,3 +355,40 @@ fn substitution() {
.to_string()
);
}

#[test]
fn opaque() {
let t = mk_term::op2(
BinaryOp::Merge(Label::default().into()),
mk_record!(("a", RichTerm::from(Term::Num(Number::from(1))))),
mk_record!(("b", RichTerm::from(Term::Opaque(42)))),
);

// Terms that include opaque values can be manipulated like normal, and the opaque values
// are passed through.
let Term::Record(data) = eval_no_import(t.clone()).unwrap() else {
panic!();
};
let b = LocIdent::from(Ident::new("b"));
let field = data.fields.get(&b).unwrap();
assert_matches!(field.value.as_ref().unwrap().as_ref(), Term::Opaque(42));

// Opaque values cannot be compared for equality.
let t_eq = mk_term::op2(
BinaryOp::Eq(),
RichTerm::from(Term::Opaque(43)),
RichTerm::from(Term::Opaque(42)),
);
assert_matches!(eval_no_import(t_eq), Err(EvalError::EqError { .. }));

// Opaque values cannot be merged (even if they're equal, since they can't get compared for equality).
let t_merge = mk_term::op2(
BinaryOp::Merge(Label::default().into()),
t.clone(),
t.clone(),
);
assert_matches!(
eval_full_no_import(t_merge),
Err(EvalError::MergeIncompatibleArgs { .. })
);
}
1 change: 1 addition & 0 deletions core/src/parser/uniterm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,7 @@ impl FixTypeVars for Type {
| TypeF::Number
| TypeF::Bool
| TypeF::String
| TypeF::Opaque
| TypeF::Symbol
| TypeF::Flat(_)
// We don't fix type variables inside a dictionary contract. A dictionary contract
Expand Down
2 changes: 2 additions & 0 deletions core/src/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,7 @@ where
.nest(2)
]
.group(),
Opaque(_) => allocator.text("%<opaque>"),
SealingKey(sym) => allocator.text(format!("%<sealing key: {sym}>")),
Sealed(_i, _rt, _lbl) => allocator.text("%<sealed>"),
Annotated(annot, rt) => allocator.atom(rt).append(annot.pretty(allocator)),
Expand Down Expand Up @@ -1110,6 +1111,7 @@ where
]
}
.group(),
Opaque => allocator.text("Opaque"),
Symbol => allocator.text("Symbol"),
Flat(t) => t.pretty(allocator),
Var(var) => allocator.as_string(var),
Expand Down
1 change: 1 addition & 0 deletions core/src/stdlib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ pub mod internals {

generate_accessor!(num);
generate_accessor!(bool);
generate_accessor!(opaque);
generate_accessor!(string);
generate_accessor!(fail);

Expand Down
12 changes: 12 additions & 0 deletions core/src/term/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,13 @@ pub enum Term {
///
/// This is a temporary solution, and will be removed in the future.
Closure(CacheIndex),

#[serde(skip)]
/// An opaque value that cannot be constructed within Nickel code.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be worth to insist on the fact that they cannot be constructed, but also should never be observable from within Nickel code (compared or distinguished by any means).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made sure they can't be compared for equality; is there some other kind of observability that needs to be handled?

I was imagining that these values could be copied around (and possibly manipulated by functions provided by the nickel embedder). So you'd be able to write a contract like { username | String, token | ForeignId }, and then by applying this contract you could observe the presence or absence of the token.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I said observable, but the right technical term is probably separable, which means "distinguishable".

That is, for every context C (that you can think of just as a function here), for any pair of foreign keys k1 and k2, then if C k1 k2 ~> v (the context evaluates to the value v), then for all other foreign keys k and l, C k l ~> v' with v ~ v' (I won't define the precise meaning of ~, but let's say it's an observational equivalence). Maybe we need to extend that to arbitrarily long finite lists of key.

Put differently, no result of an expression can depend on the actual values of the keys. They must be all interchangeable without affecting the semantics. We need to include error messages as well (so my above specification is incomplete), because that's another way for a malicious user to get the value of the key indirectly - but you properly don't print the content of the key in the pretty printer, which is all good.

Copy link

@aiverson aiverson May 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comparing by equality between foreignIds is actually specifically fine and useful. Even being able to use them as keys in maps is fine and useful as long as that doesn't reveal the content of the opaque reference directly or indirectly. (though there is some subtlety of exactly which equality is being checked, since there could be two copies of the same reference, two different references to the same local data, or two different references to promises which will eventually become the same)
It's being able to forge them or inspect the backing data that's the problem, and as long as there's no way to make a new opaque reference (from inside the language; making them from foreign functions into the trusted platform is fine) that is satisfied, and no way to look up what's behind the foreign reference. Being able to check the arbitrary number in the foreign reference that indexes the backing table isn't even a security issue as long as the table itself is impossible to access, but it is good practice to forbid that since any kind of behavioral reliance on the specific values of those numbers is fragile and should be prevented, and if the numbers are viewable in the first place someone accidentally allowing forgery is more likely.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using them as map keys is much more invasive. Currently record fields in Nickel are all strings, that you can list with stdlib functions. So opaque value would need special casing here.

Regarding equality, this is easy to add - I just wonder if there are other use-cases where you would want to not even have equality. In any case, I would propose to start the most restrictive possible and move forward with this PR, and then see cases by case what we would add and why, if that sounds good to you all.

///
/// This can be used by programs that embed Nickel, as they can inject these opaque
/// values into the AST.
Opaque(u64),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if we spell u64 somewhere else, but I wonder if we should use a type alias instead to make it easy to switch to something else (ie define Opaque(OpaquePayload) with type OpaquePayload = u64 or something).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might even be fairly straightforward to make the interpreter generic over these if desired; I don't actually need that for my usecases so it might not be worth it, but it might allow generalizing to other systems nicely.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that would be a bit painful, since it would involve carrying around an extra type parameter everywhere that Term is used (which is basically everywhere). An easier version would be to have a Box<dyn Any> to allow downcasting.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with Joe, on paper it's just adding a generic, but API-wise, this is painful, because you often have to propagate this type parameter everywhere (every site that uses a term, which is pretty much everywhere). For less fundamental datastructure we could a parameter and make an alias like pub type Foo = ParametrizedFoo<usize>, but for terms, I'm afraid this will leak in typechecking, transform, etc. which might not be worth it. The Box<dyn Any> doesn't look very nice, but yeah, if it's really needed, this might be a possibility.

}

// PartialEq is mostly used for tests, when it's handy to compare something to an expected result.
Expand Down Expand Up @@ -872,6 +879,7 @@ impl Term {
Term::SealingKey(_) => Some("SealingKey".to_owned()),
Term::Sealed(..) => Some("Sealed".to_owned()),
Term::Annotated(..) => Some("Annotated".to_owned()),
Term::Opaque(_) => Some("Opaque".to_owned()),
Term::Let(..)
| Term::LetPattern(..)
| Term::App(_, _)
Expand Down Expand Up @@ -918,6 +926,7 @@ impl Term {
| Term::EnumVariant {..}
| Term::Record(..)
| Term::Array(..)
| Term::Opaque(_)
| Term::SealingKey(_) => true,
Term::Let(..)
| Term::LetPattern(..)
Expand Down Expand Up @@ -975,6 +984,7 @@ impl Term {
| Term::Str(_)
| Term::Lbl(_)
| Term::Enum(_)
| Term::Opaque(_)
| Term::SealingKey(_) => true,
Term::Let(..)
| Term::LetPattern(..)
Expand Down Expand Up @@ -1017,6 +1027,7 @@ impl Term {
| Term::Array(..)
| Term::Var(..)
| Term::SealingKey(..)
| Term::Opaque(..)
| Term::Op1(UnaryOp::StaticAccess(_), _)
| Term::Op2(BinaryOp::DynAccess(), _, _)
// Those special cases aren't really atoms, but mustn't be parenthesized because they
Expand Down Expand Up @@ -2169,6 +2180,7 @@ impl Traverse<RichTerm> for RichTerm {
| Term::Import(_)
| Term::ResolvedImport(_)
| Term::SealingKey(_)
| Term::Opaque(_)
| Term::ParseError(_)
| Term::RuntimeError(_) => None,
Term::StrChunks(chunks) => chunks.iter().find_map(|ch| {
Expand Down
2 changes: 2 additions & 0 deletions core/src/transform/free_vars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ impl CollectFreeVars for RichTerm {
| Term::Num(_)
| Term::Str(_)
| Term::Lbl(_)
| Term::Opaque(_)
| Term::SealingKey(_)
| Term::Enum(_)
| Term::Import(_)
Expand Down Expand Up @@ -186,6 +187,7 @@ impl CollectFreeVars for Type {
| TypeF::Number
| TypeF::Bool
| TypeF::String
| TypeF::Opaque
| TypeF::Symbol
| TypeF::Var(_)
| TypeF::Wildcard(_) => (),
Expand Down
5 changes: 5 additions & 0 deletions core/src/typ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ pub enum TypeF<Ty, RRows, ERows> {
///
/// See [`crate::term::Term::Sealed`].
Symbol,
/// An opaque value, the type of `Term::Opaque`.
Opaque,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should even worry about having a type for that, given that the user can't do much with opaque values, and can't use this type right now (it's not in the grammar, and it would be a breaking change to add it). However it doesn't cost much, and maybe it could be useful for Rust binaries consuming the library? I'm not sure.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking that the nickel embedder might provide a function like union: ForeignId -> ForeignId -> ForeignId, so it would be nice if we could type-check it

Copy link
Member

@yannham yannham May 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, you mean customizing the set of operations possible on opaque values with their own operations? There is still the problem that if the user can't write this type down, it is somehow a second class citizen (and once again, making it possible to spell it out is a breaking change).

All of that being said, having the type internally is like 10 additional line of code, so let's not argue about it too much. It doesn't really hurt to have this type internally, and see later if it can be turned into something useful. I'm fine with keeping it as it is for now

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A second idea to make it not a breaking change is to wait for the let-type RFC, and make it possible to define primitive types - think %Opaque% (just so it doesn't clash with user-land). Then, once we can export types from a record, we could define stdlib namespaced types, like type Opaque = %Opaque% in std.foreign or something.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps a good way to do this would be to have a newtype construct to allow creating a distinct type family that is backed by an existing type, so that we can have this core Opaque/ForeignId type, then give names and possibly generics to various kinds of foreign data. That allows making more specific function signatures that work on foreign data too.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, in fact the let type or type = would precisely act like a newtype. But the point stands that you need to be able to refer to a primitive Opaque type at some point, and just adding it would break backward compatibility (maybe someone called their contract Opaque already). So the idea would be to have the primitive type use an obscure internal syntax - as other primops work in nickel already - and the stdlib would just export type Opaque = %BuiltinOpaqueType%.

/// A type created from a user-defined contract.
Flat(RichTerm),
/// A function.
Expand Down Expand Up @@ -543,6 +545,7 @@ impl<Ty, RRows, ERows> TypeF<Ty, RRows, ERows> {
TypeF::Number => Ok(TypeF::Number),
TypeF::Bool => Ok(TypeF::Bool),
TypeF::String => Ok(TypeF::String),
TypeF::Opaque => Ok(TypeF::Opaque),
TypeF::Symbol => Ok(TypeF::Symbol),
TypeF::Flat(t) => Ok(TypeF::Flat(t)),
TypeF::Arrow(dom, codom) => Ok(TypeF::Arrow(f(dom, state)?, f(codom, state)?)),
Expand Down Expand Up @@ -818,6 +821,7 @@ impl Subcontract for Type {
TypeF::Number => internals::num(),
TypeF::Bool => internals::bool(),
TypeF::String => internals::string(),
TypeF::Opaque => internals::opaque(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this contract can never be instantiated in practice (unless programmatically) . For example, we don't even bother elaborating a contract for Symbol below (which is also arguably not a very useful type). Maybe it would be more useful to have a std.contract.Opaque contract?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That being said, if you keep the type, I think keeping the contract as well makes sense rather than replace it with a panic, even if it's not useful right now.

// Array Dyn is specialized to array_dyn, which is constant time
TypeF::Array(ref ty) if matches!(ty.typ, TypeF::Dyn) => internals::array_dyn(),
TypeF::Array(ref ty) => mk_app!(internals::array(), ty.subcontract(vars, pol, sy)?),
Expand Down Expand Up @@ -1402,6 +1406,7 @@ impl Traverse<Type> for Type {
| TypeF::Number
| TypeF::Bool
| TypeF::String
| TypeF::Opaque
| TypeF::Symbol
| TypeF::Var(_)
| TypeF::Enum(_)
Expand Down
1 change: 1 addition & 0 deletions core/src/typecheck/mk_uniftype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,4 @@ generate_builder!(str, String);
generate_builder!(num, Number);
generate_builder!(bool, Bool);
generate_builder!(sym, Symbol);
generate_builder!(opaque, Opaque);
14 changes: 11 additions & 3 deletions core/src/typecheck/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,12 @@ impl<E: TermEnvironment> VarLevelUpperBound for GenericUnifType<E> {
impl<E: TermEnvironment> VarLevelUpperBound for GenericUnifTypeUnrolling<E> {
fn var_level_upper_bound(&self) -> VarLevel {
match self {
TypeF::Dyn | TypeF::Bool | TypeF::Number | TypeF::String | TypeF::Symbol => {
VarLevel::NO_VAR
}
TypeF::Dyn
| TypeF::Bool
| TypeF::Number
| TypeF::String
| TypeF::Opaque
| TypeF::Symbol => VarLevel::NO_VAR,
TypeF::Arrow(domain, codomain) => max(
domain.var_level_upper_bound(),
codomain.var_level_upper_bound(),
Expand Down Expand Up @@ -1440,6 +1443,7 @@ fn walk<V: TypecheckVisitor>(
| Term::Str(_)
| Term::Lbl(_)
| Term::Enum(_)
| Term::Opaque(_)
| Term::SealingKey(_)
// This function doesn't recursively typecheck imports: this is the responsibility of the
// caller.
Expand Down Expand Up @@ -1628,6 +1632,7 @@ fn walk_type<V: TypecheckVisitor>(
| TypeF::Number
| TypeF::Bool
| TypeF::String
| TypeF::Opaque
| TypeF::Symbol
// Currently, the parser can't generate unbound type variables by construction. Thus we
// don't check here for unbound type variables again.
Expand Down Expand Up @@ -2276,6 +2281,9 @@ fn check<V: TypecheckVisitor>(
}
}

Term::Opaque(_) => ty
.unify(mk_uniftype::opaque(), state, &ctxt)
.map_err(|err| err.into_typecheck_err(state, rt.pos)),
Term::SealingKey(_) => ty
.unify(mk_uniftype::sym(), state, &ctxt)
.map_err(|err| err.into_typecheck_err(state, rt.pos)),
Expand Down
2 changes: 2 additions & 0 deletions core/stdlib/internals.ncl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

"$string" = fun label value => if %typeof% value == 'String then value else %blame% label,

"$opaque" = fun label value => if %typeof% value == 'Opaque then value else %blame% label,

"$fail" = fun label _value => %blame% label,

"$array" = fun element_contract label value =>
Expand Down
Loading