Skip to content

Commit

Permalink
Merge pull request #27 from awesome-spectral-indices/fm/ds
Browse files Browse the repository at this point in the history
Reading datasets
  • Loading branch information
MartinuzziFrancesco authored Feb 4, 2024
2 parents 0e80e6d + d45370e commit 4e8b46f
Show file tree
Hide file tree
Showing 7 changed files with 362,542 additions and 25 deletions.
362,411 changes: 362,410 additions & 1 deletion data/spectral.json

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions ext/SpectralIndicesDataFramesExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,18 @@ function SpectralIndices.RBF(params::DataFrame)
return result_df
end

function SpectralIndices.load_dataset(dataset::String)
datasets = Dict("sentinel" => "S2_10m.json", "spectral" => "spectral.json")

if dataset in keys(datasets)
nothing
else
error("Dataset name not valid. Datasets available: sentinel and spectral")
end
ds = _load_json(datasets[dataset])
ds = DataFrame(ds)

return ds
end

end #module
28 changes: 28 additions & 0 deletions ext/SpectralIndicesYAXArraysExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,32 @@ function SpectralIndices.RBF(params::YAXArray)
return result
end

function SpectralIndices.load_dataset(dataset::String)
datasets = Dict("sentinel" => "S2_10m.json", "spectral" => "spectral.json")

if dataset in keys(datasets)
nothing
else
error("Dataset name not valid. Datasets available: sentinel and spectral")
end

ds = SpectralIndices._load_json(datasets[dataset])

# Convert each vector of vectors in `ds` into a matrix
matrices = [hcat(ds[i]...) for i in 1:length(ds)]

# Stack these matrices to form a 3D array
data_3d = cat(matrices...; dims=3)

# Define dimensions
x_dim = Dim{:x}(1:300)
y_dim = Dim{:y}(1:300)
bands = Dim{:bands}(["B02", "B03", "B04", "B08"])

# Create the YAXArray
yax_ds = YAXArray((x_dim, y_dim, bands), data_3d)

return yax_ds
end

end #module
3 changes: 2 additions & 1 deletion src/SpectralIndices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ include("utils.jl")
include("axioms.jl")
include("compute_index.jl")
include("compute_kernel.jl")
#include("datasets.jl")
include("datasets.jl")

if !isdefined(Base, :get_extension)
include("../ext/SpectralIndicesDataFramesExt.jl")
Expand All @@ -22,6 +22,7 @@ indices = _create_indices()
bands = _create_bands()
constants = _create_constants()

export get_datasets, load_dataset
export SpectralIndex, indices, compute
export PlatformBand, Band
export Constant
Expand Down
95 changes: 74 additions & 21 deletions src/datasets.jl
Original file line number Diff line number Diff line change
@@ -1,29 +1,82 @@
"""
gets the indices from spyndex
get_datasets(; datasets=["S2_10m.json", "spectral.json"], data_loc=joinpath(dirname(@__FILE__), "..", "data"))
Download predefined datasets from a specified remote location and save them to a local directory.
# Keyword Arguments
- `datasets::Array{String,1}`: A list of dataset filenames to download. Defaults to `["S2_10m.json", "spectral.json"]`.
- `data_loc::String`: The local directory path where the downloaded datasets will be saved. Defaults to a `data` directory located one level up from the script's directory.
# Description
This function iterates over a list of dataset filenames, downloads each dataset from a predefined remote URL, and saves them into a specified local directory. The remote URL is currently hardcoded to download specifically the "S2_10m.json" file for any given dataset in the list. Adjust the function or its usage accordingly if different URLs are needed for different datasets.
# Example
```julia
get_datasets() # Downloads the default datasets to the default location
get_datasets(; datasets=["custom_dataset.json"], data_loc="path/to/custom/directory")
```
This is particularly useful for setting up local environments with necessary data files for further processing or analysis.
"""
function get_dataset()
filedest_sentinel = joinpath(dirname(@__FILE__), "..", "data", "S2_10m.json")
filedest_spectral = joinpath(dirname(@__FILE__), "..", "data", "spectral.json")
Downloads.download(
"https://raw.githubusercontent.com/awesome-spectral-indices/spyndex/main/spyndex/data/S2_10m.json",
filedest_sentinel,
)
return Downloads.download(
"https://raw.githubusercontent.com/awesome-spectral-indices/spyndex/main/spyndex/data/spectral.json",
filedest_spectral,
)
function get_datasets(;
datasets=["S2_10m.json", "spectral.json"],
data_loc=joinpath(dirname(@__FILE__), "..", "data"),
)
for ds in datasets
file_dest = joinpath(data_loc, ds)
try
Downloads.download(
"https://raw.githubusercontent.com/awesome-spectral-indices/spyndex/main/spyndex/data/" *
ds,
file_dest,
)
catch e
@warn "Failed to download dataset $ds: $e"
end
end
end

"""
load_dataset(dataset::String) -> YAXArray
load_dataset(dataset::String) -> DataFrame
Load a specified dataset and convert it into either a YAXArray or a DataFrame, depending on the loaded packages.
# Arguments
- `dataset::String`: The name of the dataset to load. Currently supports `"sentinel"` and `"spectral"`.
# Returns
- If YAXArrays is loaded in the namespace, returns a `YAXArray` object containing the loaded dataset, with dimensions labeled as `:x`, `:y`, and `:bands`. The spatial dimensions (`:x` and `:y`) are assumed to have a size of 300 each, and the `:bands` dimension includes ["B02", "B03", "B04", "B08"] bands.
- If DataFrames is loaded in the namespace, returns a `DataFrame` with the dataset loaded into it.
# Errors
Throws an error if the `dataset` argument does not match one of the predefined dataset names.
# Example
```julia
# Load dataset as YAXArray
yax_ds = SpectralIndices.load_dataset("sentinel")
# Load dataset as DataFrame
df_ds = SpectralIndices.load_dataset("spectral")
```
The current implementation expects the JSON files ("S2_10m.json" for "sentinel" and "spectral.json" for "spectral") to follow a specific format: a vector of vectors where each inner vector represents a band's data in a 300x300 spatial grid for the YAXArray version, or a suitable structure that can be directly converted into a DataFrame for the DataFrame version.
The files are already provided for examples in the package in the folder `data`.
"""
function open(dataset::String)
datasets = Dict("sentinel" => "S2_10m.json", "spectral" => "spectral.json")
if dataset in keys(datasets)
nothing
else
error("Dataset name not valid. Datasets available: sentinel and spectral")
end
function load_dataset()
dfext = Base.get_extension(SpectralIndices, :SpectralIndicesDataFramesExt)
yaxaxext = Base.get_extension(SpectralIndices, :SpectralIndicesYAXArraysExt)

return ds = _load_json(datasets[dataset])
#TODO: finish conversion to array or dataset
if isnothing(dfext) && isnothing(yaxaxext)
error("Load a library (DataFrames, YAXArrays) to use this function")
end
end
2 changes: 0 additions & 2 deletions test/qa.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
using SpectralIndices
using JuliaFormatter: JuliaFormatter
using JET: JET
using Aqua: Aqua

Aqua.test_all(SpectralIndices; ambiguities=false, deps_compat=(check_extras = false))
@test JuliaFormatter.format(SpectralIndices; verbose=false, overwrite=false)
JET.test_package(SpectralIndices; target_defined_modules=true)
14 changes: 14 additions & 0 deletions test/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,20 @@ using SpectralIndices
end
end

@testset "Download Datasets Test" begin
temp_dir = mktempdir() # Temporary directory for testing
try
expected_files = ["S2_10m.json", "spectral.json"]
get_datasets(; datasets=expected_files, data_loc=temp_dir)
for expected_file in expected_files
@test isfile(joinpath(temp_dir, expected_file))
end

finally
rm(temp_dir; recursive=true) # Clean up
end
end

params = Dict("N" => 0.6, "R" => 0.3)
# Test correctness
@test SpectralIndices._check_params(NDVI, params) === nothing
Expand Down

0 comments on commit 4e8b46f

Please sign in to comment.