Skip to content

Commit

Permalink
added load_dataset function
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinuzziFrancesco committed Feb 4, 2024
1 parent 276d91d commit b2c2040
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 6 deletions.
2 changes: 1 addition & 1 deletion ext/SpectralIndicesDataFramesExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ function SpectralIndices.RBF(params::DataFrame)
return result_df
end

function open_dataset(dataset::String)
function SpectralIndices.load_dataset(dataset::String)
datasets = Dict("sentinel" => "S2_10m.json", "spectral" => "spectral.json")

if dataset in keys(datasets)
Expand Down
28 changes: 28 additions & 0 deletions ext/SpectralIndicesYAXArraysExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,32 @@ function SpectralIndices.RBF(params::YAXArray)
return result
end

function SpectralIndices.load_dataset(dataset::String)
datasets = Dict("sentinel" => "S2_10m.json", "spectral" => "spectral.json")

if dataset in keys(datasets)
nothing
else
error("Dataset name not valid. Datasets available: sentinel and spectral")
end

ds = SpectralIndices._load_json(datasets[dataset])

# Convert each vector of vectors in `ds` into a matrix
matrices = [hcat(ds[i]...) for i in 1:length(ds)]

# Stack these matrices to form a 3D array
data_3d = cat(matrices...; dims=3)

# Define dimensions
x_dim = Dim{:x}(1:300)
y_dim = Dim{:y}(1:300)
bands = Dim{:bands}(["B02", "B03", "B04", "B08"])

# Create the YAXArray
yax_ds = YAXArray((x_dim, y_dim, bands), data_3d)

return yax_ds
end

end #module
2 changes: 1 addition & 1 deletion src/SpectralIndices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ indices = _create_indices()
bands = _create_bands()
constants = _create_constants()

export get_datasets
export get_datasets, load_dataset
export SpectralIndex, indices, compute
export PlatformBand, Band
export Constant
Expand Down
58 changes: 55 additions & 3 deletions src/datasets.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
"""
gets the indices from spyndex
get_datasets(; datasets=["S2_10m.json", "spectral.json"], data_loc=joinpath(dirname(@__FILE__), "..", "data"))
Download predefined datasets from a specified remote location and save them to a local directory.
# Keyword Arguments
- `datasets::Array{String,1}`: A list of dataset filenames to download. Defaults to `["S2_10m.json", "spectral.json"]`.
- `data_loc::String`: The local directory path where the downloaded datasets will be saved. Defaults to a `data` directory located one level up from the script's directory.
# Description
This function iterates over a list of dataset filenames, downloads each dataset from a predefined remote URL, and saves them into a specified local directory. The remote URL is currently hardcoded to download specifically the "S2_10m.json" file for any given dataset in the list. Adjust the function or its usage accordingly if different URLs are needed for different datasets.
# Example
```julia
get_datasets() # Downloads the default datasets to the default location
get_datasets(; datasets=["custom_dataset.json"], data_loc="path/to/custom/directory")
```
This is particularly useful for setting up local environments with necessary data files for further processing or analysis.
"""
function get_datasets(;
datasets=["S2_10m.json", "spectral.json"],
Expand All @@ -8,15 +29,46 @@ function get_datasets(;
for ds in datasets
file_dest = joinpath(data_loc, ds)
Downloads.download(
"https://raw.githubusercontent.com/awesome-spectral-indices/spyndex/main/spyndex/data/S2_10m.json",
"https://raw.githubusercontent.com/awesome-spectral-indices/spyndex/main/spyndex/data/" *
ds,
file_dest,
)
end
end

"""
load_dataset(dataset::String) -> YAXArray
load_dataset(dataset::String) -> DataFrame
Load a specified dataset and convert it into either a YAXArray or a DataFrame, depending on the loaded packages.
# Arguments
- `dataset::String`: The name of the dataset to load. Currently supports `"sentinel"` and `"spectral"`.
# Returns
- If YAXArrays is loaded in the namespace, returns a `YAXArray` object containing the loaded dataset, with dimensions labeled as `:x`, `:y`, and `:bands`. The spatial dimensions (`:x` and `:y`) are assumed to have a size of 300 each, and the `:bands` dimension includes ["B02", "B03", "B04", "B08"] bands.
- If DataFrames is loaded in the namespace, returns a `DataFrame` with the dataset loaded into it.
# Errors
Throws an error if the `dataset` argument does not match one of the predefined dataset names.
# Example
```julia
# Load dataset as YAXArray
yax_ds = SpectralIndices.load_dataset("sentinel")
# Load dataset as DataFrame
df_ds = SpectralIndices.load_dataset("spectral")
```
The current implementation expects the JSON files ("S2_10m.json" for "sentinel" and "spectral.json" for "spectral") to follow a specific format: a vector of vectors where each inner vector represents a band's data in a 300x300 spatial grid for the YAXArray version, or a suitable structure that can be directly converted into a DataFrame for the DataFrame version.
The files are already provided for examples in the package in the folder `data`.
"""
function open_dataset()
function load_dataset()
dfext = Base.get_extension(SpectralIndices, :SpectralIndicesDataFramesExt)
yaxaxext = Base.get_extension(SpectralIndices, :SpectralIndicesYAXArraysExt)

Expand Down
2 changes: 1 addition & 1 deletion test/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ end
@testset "Download Datasets Test" begin
temp_dir = mktempdir() # Temporary directory for testing
try
expected_files = ["test_S2_10m.json", "test_spectral.json"]
expected_files = ["S2_10m.json", "spectral.json"]
get_datasets(; datasets=expected_files, data_loc=temp_dir)
for expected_file in expected_files
@test isfile(joinpath(temp_dir, expected_file))
Expand Down

0 comments on commit b2c2040

Please sign in to comment.