Skip to content

Commit

Permalink
Merge pull request #42 from invenia/rf/constructors
Browse files Browse the repository at this point in the history
Loosen component dict type constraints
  • Loading branch information
rofinn authored Apr 5, 2021
2 parents 2863637 + 8e215cb commit d39b647
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 35 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "AxisSets"
uuid = "a1a1544e-ba16-4f6d-8861-e833517b754e"
authors = ["Invenia Technical Computing Corporation"]
version = "0.1.3"
version = "0.1.4"

[deps]
AutoHashEquals = "15f4f7f2-30c1-5605-9d31-71845cf9641f"
Expand Down
3 changes: 0 additions & 3 deletions src/AxisSets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,6 @@ julia> axiskeys(flatten(A, (:obj, :loc)), :objᵡloc)
"""
const DEFAULT_PROD_DELIM = :ᵡ

# Short hand type for complicated union of nested Keyed or NamedDims arrays
const XArray{L, T, N} = Union{NamedDimsArray{L,T,N,<:KeyedArray}, KeyedArray{T,N,<:NamedDimsArray}}

# There's a few places calling `only` is convenient, even for older Julia releases
if VERSION < v"1.4"
function _only(x)
Expand Down
53 changes: 29 additions & 24 deletions src/dataset.jl
Original file line number Diff line number Diff line change
@@ -1,33 +1,44 @@
"""
KeyedDataset{K, T}
KeyedDataset
A `KeyedDataset` describes an associative collection of component `KeyedArray`s with constraints
on their shared dimensions.
# Fields
- `constraints::OrderedSet{Pattern}` - Constraint [`Pattern`](@ref)s on shared dimensions.
- `data::LittleDict{K, T}` - Flattened key paths as type `K <: Tuple` to each component array of type `T`.
- `data::LittleDict{Tuple, KeyedArray}` - Flattened key paths as tuples component keyed arrays.
"""
@auto_hash_equals struct KeyedDataset{K<:Tuple, T<:XArray}
@auto_hash_equals struct KeyedDataset
# Our constraints are a collection of pseudo path tuples typically with 1 or
# more `:_` wildcard components
constraints::OrderedSet{Pattern}
# Data lookup can be by any type, but typically it'll either be symbol or tuple.
data::LittleDict{K, T}
data::LittleDict{Tuple, KeyedArray}

function KeyedDataset(
constraints::OrderedSet{Pattern},
data::LittleDict{K, T},
check=true,
) where {K<:Tuple, T}
ds = new{K, T}(constraints, data)
data::LittleDict,
check=true
)
ds = new(constraints, data)
check && validate(ds)
return ds
end
end

function KeyedDataset(pairs::Pair{T}...; constraints=Pattern[]) where T<:Tuple
data = LittleDict{T, XArray}(pairs...)
function KeyedDataset(pairs::Pair...; constraints=Pattern[])
# Convert any non-tuple keys to tuples
tupled_pairs = map(pairs) do (k, v)
k isa Tuple && return k => v

if k isa Symbol
Tuple(Symbol.(split(string(k), string(DEFAULT_FLATTEN_DELIM)))) => v
else
(k,) => v
end
end

data = LittleDict(tupled_pairs)

# If no constraints have been specified then we default to (:__, dimname)
constraint_set = if isempty(constraints)
Expand All @@ -42,22 +53,16 @@ function KeyedDataset(pairs::Pair{T}...; constraints=Pattern[]) where T<:Tuple
return result
end

# Taking pairs is the most general constructor as it doesn't make assumptions about the
# data key type.
function KeyedDataset(pairs::Pair{Symbol}...; constraints=Pattern[])
return KeyedDataset(
(
Tuple(Symbol.(split(string(k), string(DEFAULT_FLATTEN_DELIM)))) => v
for (k, v) in pairs
)...;
constraints=constraints,
)
# Utility kwargs and empty constructor.
function KeyedDataset(; constraints=Pattern[], kwargs...)
if isempty(kwargs)
return KeyedDataset(OrderedSet{Pattern}(constraints), LittleDict{Tuple, KeyedArray}())
else
return KeyedDataset(kwargs...; constraints=constraints)
end
end

# Utility kwargs constructor.
KeyedDataset(; constraints=Pattern[], kwargs...) = KeyedDataset(kwargs...; constraints=constraints)

function Base.show(io::IO, ds::KeyedDataset{K, T}) where {K, T}
function Base.show(io::IO, ds::KeyedDataset)
n = length(ds.data)
m = length(ds.constraints)

Expand Down
4 changes: 2 additions & 2 deletions src/flatten.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,12 @@ function flatten(x::Vector{<:Pair}, delim::Symbol)
return [Symbol(join(k, delim)) => v for (k, v) in flatten(x)]
end

function flatten(A::XArray, dims::Tuple, delim=DEFAULT_PROD_DELIM)
function flatten(A::KeyedArray, dims::Tuple, delim=DEFAULT_PROD_DELIM)
new_name = Symbol(join(dims, delim))
flatten(A, dims => new_name, delim)
end

function flatten(A::XArray, dims::Pair{<:Tuple, Symbol}, delim=nothing)
function flatten(A::KeyedArray, dims::Pair{<:Tuple, Symbol}, delim=nothing)
# Lookup our unnamed dimensions to flatten
# We sort the result to ensure that the dimensions to flatten are consecutive
fd = sort!(collect(NamedDims.dim(A, first(dims))))
Expand Down
2 changes: 1 addition & 1 deletion src/functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ julia> ds2 = KeyedDataset(
);
julia> collect(keys(merge(ds1, ds2).data))
4-element Vector{Tuple{Symbol}}:
4-element Vector{Tuple}:
(:a,)
(:b,)
(:c,)
Expand Down
6 changes: 3 additions & 3 deletions src/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ julia> ds = KeyedDataset(
);
julia> collect(keys(ds.data))
2-element Vector{Tuple{Symbol}}:
2-element Vector{Tuple}:
(:val1,)
(:val2,)
Expand Down Expand Up @@ -154,12 +154,12 @@ julia> ds = KeyedDataset(
);
julia> collect(keys(ds(:__, :a).data))
2-element Vector{Tuple{Symbol, Symbol}}:
2-element Vector{Tuple}:
(:g1, :a)
(:g2, :a)
julia> collect(keys(ds(:g1, :__).data))
2-element Vector{Tuple{Symbol, Symbol}}:
2-element Vector{Tuple}:
(:g1, :a)
(:g1, :b)
```
Expand Down
32 changes: 32 additions & 0 deletions test/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@
@test_throws ArgumentError validate(ds)
end

@testset "Empty" begin
expected = KeyedDataset(OrderedSet{Pattern}(), LittleDict{Tuple, KeyedArray}())
@test KeyedDataset() == expected

patterns = Pattern[(:train, :_, :target)]
expected = KeyedDataset(OrderedSet(patterns), LittleDict{Tuple, KeyedArray}())
@test KeyedDataset(; constraints=patterns) == expected
end

@testset "KeyedArrays" begin
ds = KeyedDataset(
:val1 => KeyedArray(
Expand Down Expand Up @@ -49,6 +58,29 @@

# Test that we have a data dict entry for each value column
@test issetequal([(:val1,), (:val2,)], keys(ds.data))

@testset "Variable Keys" begin
# Test construction with different key types and lengths
ds = KeyedDataset(
"val1" => KeyedArray(
rand(4, 3, 2);
time=DateTime(2021, 1, 1, 11):Hour(1):DateTime(2021, 1, 1, 14),
loc=1:3,
obj=[:a, :b],
),
(:group1, 2) => KeyedArray(
rand(4, 3, 2) .+ 1.0;
time=DateTime(2021, 1, 1, 11):Hour(1):DateTime(2021, 1, 1, 14),
loc=1:3,
obj=[:a, :b],
),
)

# Test that we successfully extracted the dims
@test issetequal([:time, :loc, :obj], dimnames(ds))
# Test that we have a data dict entry for each value column
@test issetequal([("val1",), (:group1, 2)], keys(ds.data))
end
end

@testset "Flatten" begin
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ using Statistics
using Test
using TimeZones

using AxisSets: Pattern, constraintmap, dimpaths, flatten, validate
using AxisSets: Pattern, KeyedArray, constraintmap, dimpaths, flatten, validate
using Impute: ThresholdError

@testset "AxisSets.jl" begin
Expand Down

2 comments on commit d39b647

@rofinn
Copy link
Member Author

@rofinn rofinn commented on d39b647 Apr 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/33588

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.1.4 -m "<description of version>" d39b647fd0dc27954eb21f8ef047e7112910b103
git push origin v0.1.4

Please sign in to comment.