-
Notifications
You must be signed in to change notification settings - Fork 91
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Half precision support #1257
Half precision support #1257
Changes from all commits
4ed36a8
ddc7c16
501e6c7
957d29c
a0c389c
fe5e491
6c3c12b
a0ee872
5acbf27
b171312
6c17701
cdbf0a0
209c799
75b54fa
48ea338
fdcc066
f0a8a07
f041b4a
3154a04
58784ab
9b2465b
e2a6c9a
51cf597
0a42796
e3b81df
c9fd747
684cadb
60767ed
d65255a
5c0454f
da15916
5f9e3ff
fe45560
c7f0d2a
710e037
34845f3
48afbb5
a51f136
60123dc
8f1e28f
6dbd616
cd270e1
69d5b59
57fc170
18e825f
825f76f
81d63ac
2a6d382
8731fc3
64406f3
baa95f7
4bb8093
c539398
d0e2446
0d777df
56e2af8
6cc26d7
3d15350
c4697a5
377432a
e806a0a
3e49252
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,8 +14,10 @@ | |
|
||
// CUDA TOOLKIT < 11 does not support constexpr in combination with | ||
// thrust::complex, which is why constexpr is only present in later versions | ||
#if defined(__CUDA_ARCH__) && defined(__CUDACC_VER_MAJOR__) && \ | ||
(__CUDACC_VER_MAJOR__ < 11) | ||
// TODO: NVC++ constexpr | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this TODO addressed with the |
||
#if (defined(__CUDA_ARCH__) && defined(__CUDACC_VER_MAJOR__) && \ | ||
(__CUDACC_VER_MAJOR__ < 11)) || \ | ||
(defined(__NVCOMPILER) && GINKGO_ENABLE_HALF) | ||
|
||
#define GKO_ACC_ENABLE_REFERENCE_CONSTEXPR | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,9 @@ | ||
ginkgo_add_typed_benchmark_executables(spmv "YES" spmv.cpp) | ||
# TODO: move to all benchmark | ||
if (GINKGO_ENABLE_HALF) | ||
ginkgo_add_single_benchmark_executable( | ||
"spmv_half" "YES" "GKO_BENCHMARK_USE_HALF_PRECISION" "h" spmv.cpp) | ||
endif() | ||
Comment on lines
+2
to
+6
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why didn't you move it to |
||
if(GINKGO_BUILD_MPI) | ||
add_subdirectory(distributed) | ||
endif() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -132,10 +132,7 @@ struct DefaultSystemGenerator { | |
{ | ||
auto res = Vec::create(exec); | ||
res->read(gko::matrix_data<ValueType, itype>( | ||
size, | ||
std::uniform_real_distribution<gko::remove_complex<ValueType>>(-1.0, | ||
1.0), | ||
get_engine())); | ||
size, std::uniform_real_distribution<>(-1.0, 1.0), get_engine())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will potentially give us conversion warnings in the future, but I'm generally fine with it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is handled in the get_rand_value, |
||
return res; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,8 @@ using itype = gko::int32; | |
#if defined(GKO_BENCHMARK_USE_DOUBLE_PRECISION) || \ | ||
defined(GKO_BENCHMARK_USE_SINGLE_PRECISION) || \ | ||
defined(GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION) || \ | ||
defined(GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION) | ||
defined(GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION) || \ | ||
defined(GKO_BENCHMARK_USE_HALF_PRECISION) | ||
// separate ifdefs to catch duplicate definitions | ||
#ifdef GKO_BENCHMARK_USE_DOUBLE_PRECISION | ||
using etype = double; | ||
|
@@ -31,11 +32,44 @@ using etype = std::complex<double>; | |
#ifdef GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION | ||
using etype = std::complex<float>; | ||
#endif | ||
#ifdef GKO_BENCHMARK_USE_HALF_PRECISION | ||
#include <ginkgo/core/base/half.hpp> | ||
using etype = gko::half; | ||
#endif | ||
#else // default to double precision | ||
using etype = double; | ||
#endif | ||
|
||
using rc_etype = gko::remove_complex<etype>; | ||
|
||
|
||
namespace detail { | ||
|
||
|
||
// singly linked list of all our supported precisions | ||
template <typename T> | ||
struct next_precision_impl {}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would probably also be a good time to implement |
||
|
||
template <> | ||
struct next_precision_impl<float> { | ||
using type = double; | ||
}; | ||
|
||
template <> | ||
struct next_precision_impl<double> { | ||
using type = float; | ||
}; | ||
|
||
|
||
template <typename T> | ||
struct next_precision_impl<std::complex<T>> { | ||
using type = std::complex<typename next_precision_impl<T>::type>; | ||
}; | ||
|
||
|
||
} // namespace detail | ||
|
||
template <typename T> | ||
using next_precision = typename detail::next_precision_impl<T>::type; | ||
MarcelKoch marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
#endif // GKO_BENCHMARK_UTILS_TYPES_HPP_ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some ABI/name mangling differences between struct and class in MSVC - are you sure this will be defined as a struct always?
Also for cleaner headers/exports, maybe we should make this conditional on CUDA compilation?