From 29cce1c6d4e0e6801b201cc00931a3eed8f97c95 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 24 Sep 2024 19:40:13 -0400 Subject: [PATCH] [NDTensors] JLArrays Extension (#1508) --- NDTensors/Project.toml | 10 +++-- NDTensors/ext/NDTensorsAMDGPUExt/set_types.jl | 8 ++-- NDTensors/ext/NDTensorsCUDAExt/set_types.jl | 11 ++--- .../NDTensorsJLArraysExt.jl | 7 +++ NDTensors/ext/NDTensorsJLArraysExt/copyto.jl | 30 +++++++++++++ .../ext/NDTensorsJLArraysExt/indexing.jl | 19 ++++++++ .../ext/NDTensorsJLArraysExt/linearalgebra.jl | 40 +++++++++++++++++ NDTensors/ext/NDTensorsJLArraysExt/mul.jl | 43 +++++++++++++++++++ .../ext/NDTensorsJLArraysExt/permutedims.jl | 24 +++++++++++ NDTensors/ext/NDTensorsMetalExt/set_types.jl | 7 --- .../src/base/abstractarray.jl | 2 + .../TypeParameterAccessors/src/base/array.jl | 2 - .../test/NDTensorsTestUtils/device_list.jl | 23 +++++++--- NDTensors/test/Project.toml | 5 ++- jenkins/Jenkinsfile | 28 +----------- 15 files changed, 201 insertions(+), 58 deletions(-) create mode 100644 NDTensors/ext/NDTensorsJLArraysExt/NDTensorsJLArraysExt.jl create mode 100644 NDTensors/ext/NDTensorsJLArraysExt/copyto.jl create mode 100644 NDTensors/ext/NDTensorsJLArraysExt/indexing.jl create mode 100644 NDTensors/ext/NDTensorsJLArraysExt/linearalgebra.jl create mode 100644 NDTensors/ext/NDTensorsJLArraysExt/mul.jl create mode 100644 NDTensors/ext/NDTensorsJLArraysExt/permutedims.jl diff --git a/NDTensors/Project.toml b/NDTensors/Project.toml index 79b140cee6..120ea70a7a 100644 --- a/NDTensors/Project.toml +++ b/NDTensors/Project.toml @@ -36,6 +36,7 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb" MappedArrays = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900" Metal = "dde4c033-4e86-420c-a63e-0dd931031962" Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" @@ -47,6 +48,7 @@ NDTensorsAMDGPUExt = ["AMDGPU", "GPUArraysCore"] NDTensorsCUDAExt = ["CUDA", "GPUArraysCore"] NDTensorsGPUArraysCoreExt = "GPUArraysCore" NDTensorsHDF5Ext = "HDF5" +NDTensorsJLArraysExt = ["GPUArraysCore", "JLArrays"] NDTensorsMappedArraysExt = ["MappedArrays"] NDTensorsMetalExt = ["GPUArraysCore", "Metal"] NDTensorsOctavianExt = "Octavian" @@ -70,15 +72,16 @@ GPUArraysCore = "0.1" HDF5 = "0.14, 0.15, 0.16, 0.17" HalfIntegers = "1" InlineStrings = "1" -LinearAlgebra = "1.6" +JLArrays = "0.1" +LinearAlgebra = "<0.0.1, 1.6" MacroTools = "0.5" MappedArrays = "0.4" Metal = "1" Octavian = "0.3" PackageExtensionCompat = "1" -Random = "1.6" +Random = "<0.0.1, 1.6" SimpleTraits = "0.9.4" -SparseArrays = "1.6" +SparseArrays = "<0.0.1, 1.6" SplitApplyCombine = "1.2.2" StaticArrays = "0.12, 1.0" Strided = "2" @@ -95,6 +98,7 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb" Metal = "dde4c033-4e86-420c-a63e-0dd931031962" Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" TBLIS = "48530278-0828-4a49-9772-0f3830dfa1e9" diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/set_types.jl b/NDTensors/ext/NDTensorsAMDGPUExt/set_types.jl index 59ed52b5d0..759b140926 100644 --- a/NDTensors/ext/NDTensorsAMDGPUExt/set_types.jl +++ b/NDTensors/ext/NDTensorsAMDGPUExt/set_types.jl @@ -1,11 +1,11 @@ # TypeParameterAccessors definitions -using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position +using NDTensors.TypeParameterAccessors: + TypeParameterAccessors, Position, default_type_parameters using NDTensors.GPUArraysCoreExtensions: storagemode using AMDGPU: AMDGPU, ROCArray function TypeParameterAccessors.default_type_parameters(::Type{<:ROCArray}) - return (Float64, 1, AMDGPU.Mem.HIPBuffer) + return (default_type_parameters(AbstractArray)..., AMDGPU.Mem.HIPBuffer) end -TypeParameterAccessors.position(::Type{<:ROCArray}, ::typeof(eltype)) = Position(1) -TypeParameterAccessors.position(::Type{<:ROCArray}, ::typeof(ndims)) = Position(2) + TypeParameterAccessors.position(::Type{<:ROCArray}, ::typeof(storagemode)) = Position(3) diff --git a/NDTensors/ext/NDTensorsCUDAExt/set_types.jl b/NDTensors/ext/NDTensorsCUDAExt/set_types.jl index 5c3d52a141..bab1149890 100644 --- a/NDTensors/ext/NDTensorsCUDAExt/set_types.jl +++ b/NDTensors/ext/NDTensorsCUDAExt/set_types.jl @@ -1,18 +1,13 @@ # TypeParameterAccessors definitions using CUDA: CUDA, CuArray -using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position +using NDTensors.TypeParameterAccessors: + TypeParameterAccessors, Position, default_type_parameters using NDTensors.GPUArraysCoreExtensions: storagemode -function TypeParameterAccessors.position(::Type{<:CuArray}, ::typeof(eltype)) - return Position(1) -end -function TypeParameterAccessors.position(::Type{<:CuArray}, ::typeof(ndims)) - return Position(2) -end function TypeParameterAccessors.position(::Type{<:CuArray}, ::typeof(storagemode)) return Position(3) end function TypeParameterAccessors.default_type_parameters(::Type{<:CuArray}) - return (Float64, 1, CUDA.Mem.DeviceBuffer) + return (default_type_parameters(AbstractArray)..., CUDA.Mem.DeviceBuffer) end diff --git a/NDTensors/ext/NDTensorsJLArraysExt/NDTensorsJLArraysExt.jl b/NDTensors/ext/NDTensorsJLArraysExt/NDTensorsJLArraysExt.jl new file mode 100644 index 0000000000..8de2f1dcfd --- /dev/null +++ b/NDTensors/ext/NDTensorsJLArraysExt/NDTensorsJLArraysExt.jl @@ -0,0 +1,7 @@ +module NDTensorsJLArraysExt +include("copyto.jl") +include("indexing.jl") +include("linearalgebra.jl") +include("mul.jl") +include("permutedims.jl") +end diff --git a/NDTensors/ext/NDTensorsJLArraysExt/copyto.jl b/NDTensors/ext/NDTensorsJLArraysExt/copyto.jl new file mode 100644 index 0000000000..e0fe1eb99d --- /dev/null +++ b/NDTensors/ext/NDTensorsJLArraysExt/copyto.jl @@ -0,0 +1,30 @@ +using JLArrays: JLArray +using NDTensors.Expose: Exposed, expose, unexpose +using LinearAlgebra: Adjoint + +# Same definition as `CuArray`. +function Base.copy(src::Exposed{<:JLArray,<:Base.ReshapedArray}) + return reshape(copy(parent(src)), size(unexpose(src))) +end + +function Base.copy( + src::Exposed{ + <:JLArray,<:SubArray{<:Any,<:Any,<:Base.ReshapedArray{<:Any,<:Any,<:Adjoint}} + }, +) + return copy(@view copy(expose(parent(src)))[parentindices(unexpose(src))...]) +end + +# Catches a bug in `copyto!` in CUDA backend. +function Base.copyto!(dest::Exposed{<:JLArray}, src::Exposed{<:JLArray,<:SubArray}) + copyto!(dest, expose(copy(src))) + return unexpose(dest) +end + +# Catches a bug in `copyto!` in JLArray backend. +function Base.copyto!( + dest::Exposed{<:JLArray}, src::Exposed{<:JLArray,<:Base.ReshapedArray} +) + copyto!(dest, expose(parent(src))) + return unexpose(dest) +end diff --git a/NDTensors/ext/NDTensorsJLArraysExt/indexing.jl b/NDTensors/ext/NDTensorsJLArraysExt/indexing.jl new file mode 100644 index 0000000000..0f6eeb0469 --- /dev/null +++ b/NDTensors/ext/NDTensorsJLArraysExt/indexing.jl @@ -0,0 +1,19 @@ +using JLArrays: JLArray +using GPUArraysCore: @allowscalar +using NDTensors: NDTensors +using NDTensors.Expose: Exposed, expose, unexpose + +function Base.getindex(E::Exposed{<:JLArray}) + return @allowscalar unexpose(E)[] +end + +function Base.setindex!(E::Exposed{<:JLArray}, x::Number) + @allowscalar unexpose(E)[] = x + return unexpose(E) +end + +function Base.getindex(E::Exposed{<:JLArray,<:Adjoint}, i, j) + return (expose(parent(E))[j, i])' +end + +Base.any(f, E::Exposed{<:JLArray,<:NDTensors.Tensor}) = any(f, data(unexpose(E))) diff --git a/NDTensors/ext/NDTensorsJLArraysExt/linearalgebra.jl b/NDTensors/ext/NDTensorsJLArraysExt/linearalgebra.jl new file mode 100644 index 0000000000..4d594050f1 --- /dev/null +++ b/NDTensors/ext/NDTensorsJLArraysExt/linearalgebra.jl @@ -0,0 +1,40 @@ +using Adapt: adapt +using JLArrays: JLArray, JLMatrix +using LinearAlgebra: LinearAlgebra, Hermitian, Symmetric, qr, eigen +using NDTensors: NDTensors +using NDTensors.Expose: Expose, expose, qr, qr_positive, ql, ql_positive +using NDTensors.GPUArraysCoreExtensions: cpu +using NDTensors.TypeParameterAccessors: unwrap_array_type + +## TODO this function exists because of the same issue below. when +## that issue is resolved we can rely on the abstractarray version of +## this operation. +function Expose.qr(A::Exposed{<:JLArray}) + Q, L = qr(unexpose(A)) + return adapt(unwrap_array_type(A), Matrix(Q)), adapt(unwrap_array_type(A), L) +end +## TODO this should work using a JLArray but there is an error converting the Q from its packed QR from +## back into a JLArray see https://github.com/JuliaGPU/GPUArrays.jl/issues/545. To fix call cpu for now +function Expose.qr_positive(A::Exposed{<:JLArray}) + Q, L = qr_positive(expose(cpu(A))) + return adapt(unwrap_array_type(A), copy(Q)), adapt(unwrap_array_type(A), L) +end + +function Expose.ql(A::Exposed{<:JLMatrix}) + Q, L = ql(expose(cpu(A))) + return adapt(unwrap_array_type(A), copy(Q)), adapt(unwrap_array_type(A), L) +end +function Expose.ql_positive(A::Exposed{<:JLMatrix}) + Q, L = ql_positive(expose(cpu(A))) + return adapt(unwrap_array_type(A), copy(Q)), adapt(unwrap_array_type(A), L) +end + +function LinearAlgebra.eigen(A::Exposed{<:JLMatrix,<:Symmetric}) + q, l = (eigen(expose(cpu(A)))) + return adapt.(unwrap_array_type(A), (q, l)) +end + +function LinearAlgebra.eigen(A::Exposed{<:JLMatrix,<:Hermitian}) + q, l = (eigen(expose(Hermitian(cpu(unexpose(A).data))))) + return adapt.(JLArray, (q, l)) +end diff --git a/NDTensors/ext/NDTensorsJLArraysExt/mul.jl b/NDTensors/ext/NDTensorsJLArraysExt/mul.jl new file mode 100644 index 0000000000..5b04e75df6 --- /dev/null +++ b/NDTensors/ext/NDTensorsJLArraysExt/mul.jl @@ -0,0 +1,43 @@ +using JLArrays: JLArray +using LinearAlgebra: LinearAlgebra, mul!, transpose +using NDTensors.Expose: Exposed, expose, unexpose + +function LinearAlgebra.mul!( + CM::Exposed{<:JLArray,<:LinearAlgebra.Transpose}, + AM::Exposed{<:JLArray}, + BM::Exposed{<:JLArray}, + α, + β, +) + mul!(transpose(CM), transpose(BM), transpose(AM), α, β) + return unexpose(CM) +end + +function LinearAlgebra.mul!( + CM::Exposed{<:JLArray,<:LinearAlgebra.Adjoint}, + AM::Exposed{<:JLArray}, + BM::Exposed{<:JLArray}, + α, + β, +) + mul!(CM', BM', AM', α, β) + return unexpose(CM) +end + +## Fix issue in JLArrays.jl where it cannot distinguish Transpose{Reshape{Adjoint{JLArray}}} +## as a JLArray and calls generic matmul +function LinearAlgebra.mul!( + CM::Exposed{<:JLArray}, + AM::Exposed{<:JLArray}, + BM::Exposed{ + <:JLArray, + <:LinearAlgebra.Transpose{ + <:Any,<:Base.ReshapedArray{<:Any,<:Any,<:LinearAlgebra.Adjoint} + }, + }, + α, + β, +) + mul!(CM, AM, expose(transpose(copy(expose(parent(BM))))), α, β) + return unexpose(CM) +end diff --git a/NDTensors/ext/NDTensorsJLArraysExt/permutedims.jl b/NDTensors/ext/NDTensorsJLArraysExt/permutedims.jl new file mode 100644 index 0000000000..4bbd5833c4 --- /dev/null +++ b/NDTensors/ext/NDTensorsJLArraysExt/permutedims.jl @@ -0,0 +1,24 @@ +using JLArrays: JLArray +using LinearAlgebra: Adjoint +using NDTensors.Expose: Exposed, expose, unexpose + +function Base.permutedims!( + Edest::Exposed{<:JLArray,<:Base.ReshapedArray}, Esrc::Exposed{<:JLArray}, perm +) + Aperm = permutedims(Esrc, perm) + copyto!(expose(parent(Edest)), expose(Aperm)) + return unexpose(Edest) +end + +## Found an issue in CUDA where if Edest is a reshaped{<:Adjoint} +## .= can fail. So instead force Esrc into the shape of parent(Edest) +function Base.permutedims!( + Edest::Exposed{<:JLArray,<:Base.ReshapedArray{<:Any,<:Any,<:Adjoint}}, + Esrc::Exposed{<:JLArray}, + perm, + f, +) + Aperm = reshape(permutedims(Esrc, perm), size(parent(Edest))) + parent(Edest) .= f.(parent(Edest), Aperm) + return unexpose(Edest) +end diff --git a/NDTensors/ext/NDTensorsMetalExt/set_types.jl b/NDTensors/ext/NDTensorsMetalExt/set_types.jl index 72cdbd607b..761fa089d4 100644 --- a/NDTensors/ext/NDTensorsMetalExt/set_types.jl +++ b/NDTensors/ext/NDTensorsMetalExt/set_types.jl @@ -4,13 +4,6 @@ using Metal: Metal, MtlArray using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position using NDTensors.GPUArraysCoreExtensions: storagemode -## TODO remove TypeParameterAccessors when SetParameters is removed -function TypeParameterAccessors.position(::Type{<:MtlArray}, ::typeof(eltype)) - return Position(1) -end -function TypeParameterAccessors.position(::Type{<:MtlArray}, ::typeof(ndims)) - return Position(2) -end function TypeParameterAccessors.position(::Type{<:MtlArray}, ::typeof(storagemode)) return Position(3) end diff --git a/NDTensors/src/lib/TypeParameterAccessors/src/base/abstractarray.jl b/NDTensors/src/lib/TypeParameterAccessors/src/base/abstractarray.jl index 57657f70f4..14286b65cb 100644 --- a/NDTensors/src/lib/TypeParameterAccessors/src/base/abstractarray.jl +++ b/NDTensors/src/lib/TypeParameterAccessors/src/base/abstractarray.jl @@ -76,6 +76,8 @@ end position(type::Type{<:AbstractArray}, ::typeof(eltype)) = Position(1) position(type::Type{<:AbstractArray}, ::typeof(ndims)) = Position(2) +default_type_parameters(::Type{<:AbstractArray}) = (Float64, 1) + for wrapper in [:PermutedDimsArray, :(Base.ReshapedArray), :SubArray] @eval begin position(type::Type{<:$wrapper}, ::typeof(eltype)) = Position(1) diff --git a/NDTensors/src/lib/TypeParameterAccessors/src/base/array.jl b/NDTensors/src/lib/TypeParameterAccessors/src/base/array.jl index 47770f17f6..247c73ba1f 100644 --- a/NDTensors/src/lib/TypeParameterAccessors/src/base/array.jl +++ b/NDTensors/src/lib/TypeParameterAccessors/src/base/array.jl @@ -1,4 +1,2 @@ position(::Type{<:Array}, ::typeof(eltype)) = Position(1) position(::Type{<:Array}, ::typeof(ndims)) = Position(2) - -default_type_parameters(::Type{<:Array}) = (Float64, 1) diff --git a/NDTensors/test/NDTensorsTestUtils/device_list.jl b/NDTensors/test/NDTensorsTestUtils/device_list.jl index 1220ec42a9..9294956fc2 100644 --- a/NDTensors/test/NDTensorsTestUtils/device_list.jl +++ b/NDTensors/test/NDTensorsTestUtils/device_list.jl @@ -1,30 +1,40 @@ -using NDTensors: NDTensors using Pkg: Pkg +using NDTensors: NDTensors + if "cuda" in ARGS || "all" in ARGS ## Right now adding CUDA during Pkg.test results in a ## compat issues. I am adding it back to test/Project.toml - # Pkg.add("CUDA") - using CUDA + Pkg.add("CUDA") + using CUDA: CUDA end if "rocm" in ARGS || "all" in ARGS ## Warning AMDGPU does not work in Julia versions below 1.8 Pkg.add("AMDGPU") - using AMDGPU + using AMDGPU: AMDGPU end if "metal" in ARGS || "all" in ARGS ## Warning Metal does not work in Julia versions below 1.8 Pkg.add("Metal") - using Metal + using Metal: Metal end if "cutensor" in ARGS || "all" in ARGS + Pkg.add("CUDA") Pkg.add("cuTENSOR") - using CUDA, cuTENSOR + using CUDA: CUDA + using cuTENSOR: cuTENSOR end +using JLArrays: JLArrays, jl + function devices_list(test_args) devs = Vector{Function}(undef, 0) if isempty(test_args) || "base" in test_args push!(devs, NDTensors.cpu) + ## Skip jl on lower versions of Julia for now + ## all linear algebra is failing on Julia 1.6 with JLArrays + if VERSION > v"1.7" + push!(devs, jl) + end end if "cuda" in test_args || "cutensor" in test_args || "all" in test_args @@ -44,5 +54,6 @@ function devices_list(test_args) if "metal" in test_args || "all" in test_args push!(devs, NDTensors.MetalExtensions.mtl) end + return devs end diff --git a/NDTensors/test/Project.toml b/NDTensors/test/Project.toml index d2c66c4051..5244d98895 100644 --- a/NDTensors/test/Project.toml +++ b/NDTensors/test/Project.toml @@ -1,7 +1,6 @@ [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" Dictionaries = "85a47980-9c8c-11e8-2b9f-f7ca1fa99fb4" @@ -9,6 +8,7 @@ EllipsisNotation = "da5c29d0-fa7d-589e-88eb-ea29b0a81949" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" ITensors = "9136182c-28ba-11e9-034c-db9fb085ebd5" +JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MappedArrays = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900" NDTensors = "23ae76d9-e61a-49c4-8f12-3f1a16adf9cf" @@ -24,10 +24,11 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] -Metal = "1.1.0" cuTENSOR = "2.0" +Metal = "1.1.0" [extras] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Metal = "dde4c033-4e86-420c-a63e-0dd931031962" cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index c50a7c874c..5ac7f6a62b 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -7,30 +7,6 @@ pipeline { stages { stage('GPU Testing') { parallel { - stage('NDTensorsCUDAExt julia-1.6') { - options { - timeout(time: 45, unit: 'MINUTES') - } - agent { - dockerfile { - label 'gpu&&v100' - filename 'Dockerfile' - dir 'jenkins' - additionalBuildArgs '--build-arg JULIA=1.6' - args '--gpus "device=0"' - } - } - environment { - HOME = pwd(tmp:true) - OMP_NUM_THREADS = 4 - JULIA_NUM_THREADS = 4 - } - steps { - sh ''' - julia -e 'using Pkg; Pkg.Registry.update(); Pkg.update(); Pkg.activate(temp=true); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cuda"])' - ''' - } - } stage('NDTensorsCUDAExt julia-1.10') { options { timeout(time: 45, unit: 'MINUTES') @@ -51,7 +27,7 @@ pipeline { } steps { sh ''' - julia -e 'using Pkg; Pkg.Registry.update(); Pkg.update(); Pkg.activate(temp=true); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cuda"])' + julia -e 'using Pkg; Pkg.activate(temp=true); Pkg.Registry.update(); Pkg.update(); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cuda"])' ''' } } @@ -75,7 +51,7 @@ pipeline { } steps { sh ''' - julia -e 'using Pkg; Pkg.Registry.update(); Pkg.update(); Pkg.activate(temp=true); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cutensor"])' + julia -e 'using Pkg; Pkg.activate(temp=true); Pkg.Registry.update(); Pkg.update(); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cutensor"])' ''' } }