From b2c204062374f426613bff0cac729aba54781ce2 Mon Sep 17 00:00:00 2001 From: MartinuzziFrancesco Date: Sun, 4 Feb 2024 15:29:24 +0100 Subject: [PATCH] added load_dataset function --- ext/SpectralIndicesDataFramesExt.jl | 2 +- ext/SpectralIndicesYAXArraysExt.jl | 28 ++++++++++++++ src/SpectralIndices.jl | 2 +- src/datasets.jl | 58 +++++++++++++++++++++++++++-- test/utils.jl | 2 +- 5 files changed, 86 insertions(+), 6 deletions(-) diff --git a/ext/SpectralIndicesDataFramesExt.jl b/ext/SpectralIndicesDataFramesExt.jl index 703c7b0..50c9b06 100644 --- a/ext/SpectralIndicesDataFramesExt.jl +++ b/ext/SpectralIndicesDataFramesExt.jl @@ -54,7 +54,7 @@ function SpectralIndices.RBF(params::DataFrame) return result_df end -function open_dataset(dataset::String) +function SpectralIndices.load_dataset(dataset::String) datasets = Dict("sentinel" => "S2_10m.json", "spectral" => "spectral.json") if dataset in keys(datasets) diff --git a/ext/SpectralIndicesYAXArraysExt.jl b/ext/SpectralIndicesYAXArraysExt.jl index 08d5e41..9aae818 100644 --- a/ext/SpectralIndicesYAXArraysExt.jl +++ b/ext/SpectralIndicesYAXArraysExt.jl @@ -89,4 +89,32 @@ function SpectralIndices.RBF(params::YAXArray) return result end +function SpectralIndices.load_dataset(dataset::String) + datasets = Dict("sentinel" => "S2_10m.json", "spectral" => "spectral.json") + + if dataset in keys(datasets) + nothing + else + error("Dataset name not valid. Datasets available: sentinel and spectral") + end + + ds = SpectralIndices._load_json(datasets[dataset]) + + # Convert each vector of vectors in `ds` into a matrix + matrices = [hcat(ds[i]...) for i in 1:length(ds)] + + # Stack these matrices to form a 3D array + data_3d = cat(matrices...; dims=3) + + # Define dimensions + x_dim = Dim{:x}(1:300) + y_dim = Dim{:y}(1:300) + bands = Dim{:bands}(["B02", "B03", "B04", "B08"]) + + # Create the YAXArray + yax_ds = YAXArray((x_dim, y_dim, bands), data_3d) + + return yax_ds +end + end #module diff --git a/src/SpectralIndices.jl b/src/SpectralIndices.jl index 220cdae..7511989 100644 --- a/src/SpectralIndices.jl +++ b/src/SpectralIndices.jl @@ -22,7 +22,7 @@ indices = _create_indices() bands = _create_bands() constants = _create_constants() -export get_datasets +export get_datasets, load_dataset export SpectralIndex, indices, compute export PlatformBand, Band export Constant diff --git a/src/datasets.jl b/src/datasets.jl index 86db4ad..eab080f 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -1,5 +1,26 @@ """ -gets the indices from spyndex + get_datasets(; datasets=["S2_10m.json", "spectral.json"], data_loc=joinpath(dirname(@__FILE__), "..", "data")) + +Download predefined datasets from a specified remote location and save them to a local directory. + +# Keyword Arguments + + - `datasets::Array{String,1}`: A list of dataset filenames to download. Defaults to `["S2_10m.json", "spectral.json"]`. + - `data_loc::String`: The local directory path where the downloaded datasets will be saved. Defaults to a `data` directory located one level up from the script's directory. + +# Description + +This function iterates over a list of dataset filenames, downloads each dataset from a predefined remote URL, and saves them into a specified local directory. The remote URL is currently hardcoded to download specifically the "S2_10m.json" file for any given dataset in the list. Adjust the function or its usage accordingly if different URLs are needed for different datasets. + +# Example + +```julia +get_datasets() # Downloads the default datasets to the default location + +get_datasets(; datasets=["custom_dataset.json"], data_loc="path/to/custom/directory") +``` + +This is particularly useful for setting up local environments with necessary data files for further processing or analysis. """ function get_datasets(; datasets=["S2_10m.json", "spectral.json"], @@ -8,15 +29,46 @@ function get_datasets(; for ds in datasets file_dest = joinpath(data_loc, ds) Downloads.download( - "https://raw.githubusercontent.com/awesome-spectral-indices/spyndex/main/spyndex/data/S2_10m.json", + "https://raw.githubusercontent.com/awesome-spectral-indices/spyndex/main/spyndex/data/" * + ds, file_dest, ) end end """ + load_dataset(dataset::String) -> YAXArray + load_dataset(dataset::String) -> DataFrame + +Load a specified dataset and convert it into either a YAXArray or a DataFrame, depending on the loaded packages. + +# Arguments + + - `dataset::String`: The name of the dataset to load. Currently supports `"sentinel"` and `"spectral"`. + +# Returns + + - If YAXArrays is loaded in the namespace, returns a `YAXArray` object containing the loaded dataset, with dimensions labeled as `:x`, `:y`, and `:bands`. The spatial dimensions (`:x` and `:y`) are assumed to have a size of 300 each, and the `:bands` dimension includes ["B02", "B03", "B04", "B08"] bands. + - If DataFrames is loaded in the namespace, returns a `DataFrame` with the dataset loaded into it. + +# Errors + +Throws an error if the `dataset` argument does not match one of the predefined dataset names. + +# Example + +```julia +# Load dataset as YAXArray +yax_ds = SpectralIndices.load_dataset("sentinel") + +# Load dataset as DataFrame +df_ds = SpectralIndices.load_dataset("spectral") +``` + +The current implementation expects the JSON files ("S2_10m.json" for "sentinel" and "spectral.json" for "spectral") to follow a specific format: a vector of vectors where each inner vector represents a band's data in a 300x300 spatial grid for the YAXArray version, or a suitable structure that can be directly converted into a DataFrame for the DataFrame version. +The files are already provided for examples in the package in the folder `data`. """ -function open_dataset() +function load_dataset() dfext = Base.get_extension(SpectralIndices, :SpectralIndicesDataFramesExt) yaxaxext = Base.get_extension(SpectralIndices, :SpectralIndicesYAXArraysExt) diff --git a/test/utils.jl b/test/utils.jl index eaecc42..ccf202f 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -31,7 +31,7 @@ end @testset "Download Datasets Test" begin temp_dir = mktempdir() # Temporary directory for testing try - expected_files = ["test_S2_10m.json", "test_spectral.json"] + expected_files = ["S2_10m.json", "spectral.json"] get_datasets(; datasets=expected_files, data_loc=temp_dir) for expected_file in expected_files @test isfile(joinpath(temp_dir, expected_file))