diff --git a/addon/gemmd/attic/bli_gemm_ex.c b/addon/gemmd/attic/bli_gemm_ex.c index 0f40d1cb39..88f0e159cf 100644 --- a/addon/gemmd/attic/bli_gemm_ex.c +++ b/addon/gemmd/attic/bli_gemm_ex.c @@ -69,11 +69,10 @@ void bli_gemm_ex // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Obtain a valid (native) context from the gks if necessary. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Check the operands. if ( bli_error_checking_is_enabled() ) diff --git a/addon/gemmd/bao_gemmd.c b/addon/gemmd/bao_gemmd.c index 01185a9d75..35653279a6 100644 --- a/addon/gemmd/bao_gemmd.c +++ b/addon/gemmd/bao_gemmd.c @@ -78,13 +78,12 @@ void bao_gemmd_ex // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Obtain a valid (native) context from the gks if necessary. // NOTE: This must be done before calling the _check() function, since // that function assumes the context pointer is valid. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Check parameters. if ( bli_error_checking_is_enabled() ) diff --git a/build/bli_config.h.in b/build/bli_config.h.in index fa6bbbe12e..a46258182b 100644 --- a/build/bli_config.h.in +++ b/build/bli_config.h.in @@ -79,6 +79,20 @@ #define BLIS_DISABLE_SBA_POOLS #endif +#if @enable_error_checking@ +#define BLIS_ENABLE_ERROR_CHECKING +#else +#define BLIS_DISABLE_ERROR_CHECKING +#endif + +#if @enable_error_return@ +#define BLIS_ENABLE_ERROR_RETURN +#endif + +#if @enable_error_abort@ +#define BLIS_ENABLE_ERROR_ABORT +#endif + #if @enable_mem_tracing@ #define BLIS_ENABLE_MEM_TRACING #else diff --git a/build/detect/config/config_detect.c b/build/detect/config/config_detect.c index 5f1ea0f420..b501f6b23a 100644 --- a/build/detect/config/config_detect.c +++ b/build/detect/config/config_detect.c @@ -69,8 +69,10 @@ int main( int argc, char** argv ) { - arch_t id = bli_cpuid_query_id(); - const char* s = bli_arch_string( id ); + const char* s; + + arch_t id = bli_cpuid_query_id(); + err_t r_val = bli_arch_string( id, &s ); printf( "%s\n", s ); diff --git a/configure b/configure index 5ff877317f..43d16bd40c 100755 --- a/configure +++ b/configure @@ -209,9 +209,30 @@ print_usage() echo " it no longer needs to call malloc() or free(), even" echo " across many separate level-3 operation invocations." echo " " + echo " --enable-error-checking, --disable-error-checking" + echo " " + echo " Disable (enabled by default) runtime error checking. This" + echo " includes checking for things such as inconsistent object" + echo " properties, memory allocation errors, and configuration" + echo " errors. When enabled, BLIS will report an error via the" + echo " method specified by the --error-handling-mode option." + echo " When disabled, any function that is set up to return an" + echo " error code will return \"success\" unconditionally." + echo " " + echo " --error-handling-mode=[return|abort]" + echo " " + echo " Specify the way that BLIS reacts to errors. The 'return'" + echo " mode causes BLIS to return an error code all the way up" + echo " the function stack to the caller, which may then be used" + echo " to query a human-readable error string. The 'abort' mode" + echo " causes BLIS to output the aforementioned error string and" + echo " then call abort(), which facilitates debugging (e.g. via" + echo " a debugger's backtrace feature). By default, the 'abort'" + echo " mode is used." + echo " " echo " --enable-mem-tracing, --disable-mem-tracing" echo " " - echo " Enable (disable by default) output to stdout that traces" + echo " Enable (disabled by default) output to stdout that traces" echo " the allocation and freeing of memory, including the names" echo " of the functions that triggered the allocation/freeing." echo " Enabling this option WILL NEGATIVELY IMPACT PERFORMANCE." @@ -339,8 +360,8 @@ print_usage() echo " these division instructions within the microkernel will" echo " incur a performance penalty, but numerical robustness will" echo " improve for certain cases involving denormal numbers that" - echo " would otherwise result in overflow in the pre-inverted" - echo " values." + echo " would otherwise result in overflow if pre-inversion were" + echo " employed." echo " " echo " --force-version=STRING" echo " " @@ -356,14 +377,14 @@ print_usage() echo " a sanity check to make sure these lists are constituted" echo " as expected." echo " " - echo " --complex-return=gnu|intel" + echo " --complex-return=[gnu|intel]" echo " " echo " Specify the way in which complex numbers are returned" - echo " from Fortran functions, either \"gnu\" (return in" - echo " registers) or \"intel\" (return via hidden argument)." + echo " from Fortran functions, either 'gnu' (return in" + echo " registers) or 'intel' (return via hidden argument)." echo " If not specified and the environment variable FC is set," echo " attempt to determine the return type from the compiler." - echo " Otherwise, the default is \"gnu\"." + echo " Otherwise, the default is 'gnu'." echo " " echo " -q, --quiet Suppress informational output. By default, configure" echo " is verbose. (NOTE: -q is not yet implemented)" @@ -2451,6 +2472,10 @@ main() quiet_flag='' show_config_list='' + # Error-related flags. + enable_error_checking='yes' + error_handling_mode='abort' + # Additional flags. enable_verbose='no' enable_arg_max_hack='no' @@ -2602,6 +2627,15 @@ main() disable-system) enable_system='no' ;; + enable-error-checking) + enable_error_checking='yes' + ;; + disable-error-checking) + enable_error_checking='no' + ;; + error-handling-mode=*) + error_handling_mode=${OPTARG#*=} + ;; enable-threading=*) threading_model=${OPTARG#*=} ;; @@ -3465,7 +3499,7 @@ main() exit 1 fi - # Convert 'yes' and 'no' flags to booleans. + # Check if we are enabling memory pools for large or small blocks. if [ "x${enable_pba_pools}" = "xyes" ]; then echo "${script_name}: internal memory pools for packing blocks are enabled." enable_pba_pools_01=1 @@ -3480,6 +3514,31 @@ main() echo "${script_name}: internal memory pools for small blocks are disabled." enable_sba_pools_01=0 fi + + # Check if we are enabling error checking. + if [ "x${enable_error_checking}" = "xyes" ]; then + echo "${script_name}: error checking is enabled." + enable_error_checking_01=1 + else + echo "${script_name}: error checking is disabled." + enable_error_checking_01=0 + fi + + # Check the error handling mode. + enable_error_return_01=0 + enable_error_abort_01=0 + if [ "x${error_handling_mode}" = "xreturn" ]; then + echo "${script_name}: requesting that error codes be returned to caller." + enable_error_return_01=1 + elif [ "x${error_handling_mode}" = "xabort" ]; then + echo "${script_name}: requesting that errors trigger a message followed by abort()." + enable_error_abort_01=1 + else + echo "${script_name}: *** Unsupported mode of error handling: ${error_handling_mode}." + exit 1 + fi + + # Check if we are enabling memory tracing output. if [ "x${enable_mem_tracing}" = "xyes" ]; then echo "${script_name}: memory tracing output is enabled." enable_mem_tracing_01=1 @@ -3487,6 +3546,8 @@ main() echo "${script_name}: memory tracing output is disabled." enable_mem_tracing_01=0 fi + + # Check if we are enabling support for libmemkind. if [ "x${has_memkind}" = "xyes" ]; then if [ "x${enable_memkind}" = "x" ]; then # If no explicit option was given for libmemkind one way or the other, @@ -3514,6 +3575,8 @@ main() enable_memkind="no" enable_memkind_01=0 fi + + # Check if we are enabling #pragma omp simd. if [ "x${pragma_omp_simd}" = "xyes" ]; then echo "${script_name}: compiler appears to support #pragma omp simd." enable_pragma_omp_simd_01=1 @@ -3521,6 +3584,8 @@ main() echo "${script_name}: compiler appears to not support #pragma omp simd." enable_pragma_omp_simd_01=0 fi + + # Check if we are enabling the BLAS/CBLAS compatibility layers. if [ "x${enable_blas}" = "xyes" ]; then echo "${script_name}: the BLAS compatibility layer is enabled." enable_blas_01=1 @@ -3537,6 +3602,8 @@ main() echo "${script_name}: the CBLAS compatibility layer is disabled." enable_cblas_01=0 fi + + # Check if we are enabling mixed datatype support. if [ "x${enable_mixed_dt}" = "xyes" ]; then echo "${script_name}: mixed datatype support is enabled." @@ -3555,6 +3622,8 @@ main() enable_mixed_dt_extra_mem_01=0 enable_mixed_dt_01=0 fi + + # Check if we are enabling skinny/unpacked (sup) matrix handling. if [ "x${enable_sup_handling}" = "xyes" ]; then echo "${script_name}: small matrix handling is enabled." enable_sup_handling_01=1 @@ -3562,6 +3631,8 @@ main() echo "${script_name}: small matrix handling is disabled." enable_sup_handling_01=0 fi + + # Check if we are enabling pre-inversion of diagonal elements for trsm. if [ "x${enable_trsm_preinversion}" = "xyes" ]; then echo "${script_name}: trsm diagonal element pre-inversion is enabled." enable_trsm_preinversion_01=1 @@ -3713,7 +3784,7 @@ main() exit 1 fi - echo "${script_name}: configuring complex return type as \"${complex_return}\"." + echo "${script_name}: configuring complex return type as '${complex_return}'." # Variables that may contain forward slashes, such as paths, need extra # escaping when used in sed commands. We insert those extra escape @@ -3896,6 +3967,9 @@ main() | sed -e "s/@enable_jrir_rr@/${enable_jrir_rr_01}/g" \ | sed -e "s/@enable_pba_pools@/${enable_pba_pools_01}/g" \ | sed -e "s/@enable_sba_pools@/${enable_sba_pools_01}/g" \ + | sed -e "s/@enable_error_checking@/${enable_error_checking_01}/g" \ + | sed -e "s/@enable_error_return@/${enable_error_return_01}/g" \ + | sed -e "s/@enable_error_abort@/${enable_error_abort_01}/g" \ | sed -e "s/@enable_mem_tracing@/${enable_mem_tracing_01}/g" \ | sed -e "s/@int_type_size@/${int_type_size}/g" \ | sed -e "s/@blas_int_type_size@/${blas_int_type_size}/g" \ diff --git a/docs/BLISObjectAPI.md b/docs/BLISObjectAPI.md index 5e8ed3d8fb..f618786abc 100644 --- a/docs/BLISObjectAPI.md +++ b/docs/BLISObjectAPI.md @@ -203,9 +203,9 @@ The expert interface contains two additional parameters: a `cntx_t*` and `rntm_t In general, it is permissible to pass in `NULL` for a `cntx_t*` parameter when calling an expert interface such as `bli_gemm_ex()`. However, there are cases where `NULL` values are not accepted and may result in a segmentation fault. Specifically, the `cntx_t*` argument appears in the interfaces to the `gemm`, `trsm`, and `gemmtrsm` [level-3 microkernels](KernelsHowTo.md#level-3) along with all [level-1v](KernelsHowTo.md#level-1v) and [level-1f](KernelsHowTo.md#level-1f) kernels. There, as a general rule, a valid pointer must be passed in. Whenever a valid context is needed, the developer may query a default context from the global kernel structure (if a context is not already available in the current scope): ```c -cntx_t* bli_gks_query_cntx( void ); +err_t bli_gks_query_cntx( cntx** cntx ); ``` -When BLIS is configured to target a configuration family (e.g. `intel64`, `x86_64`), `bli_gks_query_cntx()` will use `cpuid` or an equivalent heuristic to select and and return the appropriate context. When BLIS is configured to target a singleton sub-configuration (e.g. `haswell`, `skx`), `bli_gks_query_cntx()` will unconditionally return a pointer to the context appropriate for the targeted configuration. +When BLIS is configured to target a configuration family (e.g. `intel64`, `x86_64`), `bli_gks_query_cntx()` will use `cpuid` or an equivalent heuristic to provide the appropriate context. When BLIS is configured to target a singleton sub-configuration (e.g. `haswell`, `skx`), `bli_gks_query_cntx()` will unconditionally provide a pointer to the context appropriate for the targeted configuration. ## Runtime type @@ -2288,15 +2288,15 @@ char* bli_info_get_version_str( void ); ## Specific configuration -The following routine returns a unique ID of type `arch_t` that identifies the current current active configuration: +The following routine determines a unique ID of type `arch_t` that identifies the current current active configuration: ```c -arch_t bli_arch_query_id( void ); +err_t bli_arch_query_id( arch_t* id ); ``` This is most useful when BLIS is configured with multiple configurations. (When linking to multi-configuration builds of BLIS, you don't know for sure which configuration will be used until runtime since the configuration-specific parameters are not loaded until after calling a hueristic to detect the hardware--usually based the `CPUID` instruction.) Once the configuration's ID is known, it can be used to query a string that contains the name of the configuration: ```c -char* bli_arch_string( arch_t id ); +err_t bli_arch_string( arch_t id, const char** str ); ``` ## General configuration @@ -2328,11 +2328,11 @@ gint_t bli_info_get_blas_int_type_size( void ); The following routines allow the caller to obtain a string that identifies the implementation type of each microkernel that is currently active (ie: part of the current active configuration, as identified bi `bli_arch_query_id()`). ```c -char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ) +err_t bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt, char** str ) ``` Possible implementation (ie: the `ind_t method` argument) types are: diff --git a/docs/BLISTypedAPI.md b/docs/BLISTypedAPI.md index 76d7ef8f63..4c087dbe32 100644 --- a/docs/BLISTypedAPI.md +++ b/docs/BLISTypedAPI.md @@ -154,11 +154,11 @@ The expert interface contains two additional parameters: a `cntx_t*` and `rntm_t ## Context type -In general, it is permissible to pass in `NULL` for a `cntx_t*` parameter when calling an expert interface such as `bli_dgemm_ex()`. However, there are cases where `NULL` values are not accepted and may result in a segmentation fault. Specifically, the `cntx_t*` argument appears in the interfaces to the `gemm`, `trsm`, and `gemmtrsm` [level-3 microkernels](KernelsHowTo.md#level-3) along with all [level-1v](KernelsHowTo.md#level-1v) and [level-1f](KernelsHowTo.md#level-1f) kernels. There, as a general rule, a valid pointer must be passed in. Whenever a valid context is needed, the developer may query a default context from the global kernel structure (if a context is not already available in the current scope): +In general, it is permissible to pass in `NULL` for a `cntx_t*` parameter when calling an expert interface such as `bli_gemm_ex()`. However, there are cases where `NULL` values are not accepted and may result in a segmentation fault. Specifically, the `cntx_t*` argument appears in the interfaces to the `gemm`, `trsm`, and `gemmtrsm` [level-3 microkernels](KernelsHowTo.md#level-3) along with all [level-1v](KernelsHowTo.md#level-1v) and [level-1f](KernelsHowTo.md#level-1f) kernels. There, as a general rule, a valid pointer must be passed in. Whenever a valid context is needed, the developer may query a default context from the global kernel structure (if a context is not already available in the current scope): ```c -cntx_t* bli_gks_query_cntx( void ); +err_t bli_gks_query_cntx( cntx** cntx ); ``` -When BLIS is configured to target a configuration family (e.g. `intel64`, `x86_64`), `bli_gks_query_cntx()` will use `cpuid` or an equivalent heuristic to select and and return the appropriate context. When BLIS is configured to target a singleton sub-configuration (e.g. `haswell`, `skx`), `bli_gks_query_cntx()` will unconditionally return a pointer to the context appropriate for the targeted configuration. +When BLIS is configured to target a configuration family (e.g. `intel64`, `x86_64`), `bli_gks_query_cntx()` will use `cpuid` or an equivalent heuristic to provide the appropriate context. When BLIS is configured to target a singleton sub-configuration (e.g. `haswell`, `skx`), `bli_gks_query_cntx()` will unconditionally provide a pointer to the context appropriate for the targeted configuration. ## Runtime type @@ -1967,15 +1967,15 @@ char* bli_info_get_version_str( void ); ## Specific configuration -The following routine returns a unique ID of type `arch_t` that identifies the current current active configuration: +The following routine determines a unique ID of type `arch_t` that identifies the current current active configuration: ```c -arch_t bli_arch_query_id( void ); +err_t bli_arch_query_id( arch_t* id ); ``` This is most useful when BLIS is configured with multiple configurations. (When linking to multi-configuration builds of BLIS, you don't know for sure which configuration will be used until runtime since the configuration-specific parameters are not loaded until after calling a hueristic to detect the hardware--usually based the `CPUID` instruction.) Once the configuration's ID is known, it can be used to query a string that contains the name of the configuration: ```c -char* bli_arch_string( arch_t id ); +err_t bli_arch_string( arch_t id, const char** str ); ``` ## General configuration @@ -2007,11 +2007,11 @@ gint_t bli_info_get_blas_int_type_size( void ); The following routines allow the caller to obtain a string that identifies the implementation type of each microkernel that is currently active (ie: part of the current active configuration, as identified bi `bli_arch_query_id()`). ```c -char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ) -char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ) +err_t bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt, char** str ) +err_t bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt, char** str ) ``` Possible implementation (ie: the `ind_t method` argument) types are: @@ -2029,16 +2029,16 @@ Possible microkernel types (ie: the return values for `bli_info_get_*_ukr_impl_s The following routines allow the caller to obtain a string that identifies the implementation (`ind_t`) that is currently active (ie: implemented and enabled) for each level-3 operation. Possible implementation types are listed in the section above covering [microkernel implemenation query](BLISTypedAPI.md#microkernel-implementation-type-query). ```c -char* bli_info_get_gemm_impl_string( num_t dt ); -char* bli_info_get_hemm_impl_string( num_t dt ); -char* bli_info_get_herk_impl_string( num_t dt ); -char* bli_info_get_her2k_impl_string( num_t dt ); -char* bli_info_get_symm_impl_string( num_t dt ); -char* bli_info_get_syrk_impl_string( num_t dt ); -char* bli_info_get_syr2k_impl_string( num_t dt ); -char* bli_info_get_trmm_impl_string( num_t dt ); -char* bli_info_get_trmm3_impl_string( num_t dt ); -char* bli_info_get_trsm_impl_string( num_t dt ); +err_t bli_info_get_gemm_impl_string( num_t dt, char** str ); +err_t bli_info_get_hemm_impl_string( num_t dt, char** str ); +err_t bli_info_get_herk_impl_string( num_t dt, char** str ); +err_t bli_info_get_her2k_impl_string( num_t dt, char** str ); +err_t bli_info_get_symm_impl_string( num_t dt, char** str ); +err_t bli_info_get_syrk_impl_string( num_t dt, char** str ); +err_t bli_info_get_syr2k_impl_string( num_t dt, char** str ); +err_t bli_info_get_trmm_impl_string( num_t dt, char** str ); +err_t bli_info_get_trmm3_impl_string( num_t dt, char** str ); +err_t bli_info_get_trsm_impl_string( num_t dt, char** str ); ``` diff --git a/docs/ConfigurationHowTo.md b/docs/ConfigurationHowTo.md index cc12241823..f3ed2d956a 100644 --- a/docs/ConfigurationHowTo.md +++ b/docs/ConfigurationHowTo.md @@ -595,7 +595,7 @@ Adding support for a new umbrella configuration family in BLIS is fairly straigh ``` The `BLIS_FAMILY_INTELAVX` will automatically be defined by the build system whenever the family was targeted by `configure` is `intelavx`. (In general, if the user runs `./configure foobar`, the C preprocessor macro `BLIS_FAMILY_FOOBAR` will be defined.) - * **`frame/base/bli_arch.c`**. This file must be updated so that `bli_arch_query_id()` returns the correct `arch_t` microarchitecture ID value to the caller. This function is called when the framework is trying to choose which sub-configuration to use at runtime. For x86_64 architectures, this is supported via the `CPUID` instruction, as implemented via `bli_cpuid_query_id()`. Thus, you can simply mimic what is done for the `intel64` family by inserting lines such as: + * **`frame/base/bli_arch.c`**. This file must be updated so that `bli_arch_query_id()` determines the correct `arch_t` microarchitecture ID value for the caller. This function is called when the framework is trying to choose which sub-configuration to use at runtime. For x86_64 architectures, this is supported via the `CPUID` instruction, as implemented via `bli_cpuid_query_id()`. Thus, you can simply mimic what is done for the `intel64` family by inserting lines such as: ```c #ifdef BLIS_FAMILY_INTELAVX id = bli_cpuid_query_id(); @@ -718,13 +718,13 @@ Adding support for a new-subconfiguration to BLIS is similar to adding support f - * **`frame/base/bli_arch.c`**. This file must be updated so that `bli_arch_query_id()` returns the correct `arch_t` architecture ID value to the caller. `bli_arch_query_id()` is called when the framework is trying to choose which sub-configuration to use at runtime. When adding support for a sub-configuration as a singleton family, this amounts to adding a block of code such as: + * **`frame/base/bli_arch.c`**. This file must be updated so that `bli_arch_query_id()` determines the correct `arch_t` architecture ID value for the caller. This function is called when the framework is trying to choose which sub-configuration to use at runtime. When adding support for a sub-configuration as a singleton family, this amounts to adding a block of code such as: ```c #ifdef BLIS_FAMILY_KNL id = BLIS_ARCH_KNL; #endif ``` - The `BLIS_FAMILY_KNL` macro is automatically `#defined` by the build system if the `knl` sub-configuration was targeted directly (as a singleton family) at configure-time. Other ID values are returned only if their respective family macros are defined. (Recall that only one family is ever enabled at time.) If, however, the `knl` sub-configuration was enabled indirectly via an umbrella family, `bli_arch_query_id()` will return the `arch_t` ID value via the lines similar to the following: + The `BLIS_FAMILY_KNL` macro is automatically `#defined` by the build system if the `knl` sub-configuration was targeted directly (as a singleton family) at configure-time. Other ID values are returned only if their respective family macros are defined. (Recall that only one family is ever enabled at time.) If, however, the `knl` sub-configuration was enabled indirectly via an umbrella family, `bli_arch_query_id()` will provide the `arch_t` ID value via the lines similar to the following: ```c #ifdef BLIS_FAMILY_INTEL64 id = bli_cpuid_query_id(); diff --git a/docs/KernelsHowTo.md b/docs/KernelsHowTo.md index 6e84db8e76..c864becc73 100644 --- a/docs/KernelsHowTo.md +++ b/docs/KernelsHowTo.md @@ -118,29 +118,15 @@ not already available in your current scope, a default context for the hardware for which BLIS was configured (or, in the case of multi-configuration builds, the hardware on which BLIS is currently running) may be queried via: ```c -cntx_t* bli_gks_query_cntx( void ); +err_t bli_gks_query_cntx( const cntx_t** cntx ); ``` -Once this `cntx_t*` pointer is obtained, you may call one of three functions to query any of the computation kernels described in this document: +Once this `cntx_t*` pointer is obtained, you may call the following function to query any of the computation kernels described in this document: ```c -void* bli_cntx_get_l3_nat_ukr_dt +void_fp bli_cntx_get_ukr_dt ( - num_t dt, - l3ukr_t ker_id, - cntx_t* cntx - ); - -void* bli_cntx_get_l1f_ker_dt - ( - num_t dt, - l1fkr_t ker_id, - cntx_t* cntx - ); - -void* bli_cntx_get_l1v_ker_dt - ( - num_t dt, - l1vkr_t ker_id, - cntx_t* cntx + num_t dt, + ukr_t ker_id, + const cntx_t* cntx ); ``` The `dt` and `ker_id` parameters specify the floating-point datatype and the @@ -152,30 +138,26 @@ Valid values for `ker_id` are given in the tables below. Also, note that the return values of `bli_cntx_get_l1v_ker_dt` `bli_cntx_get_l1f_ker_dt()`, and `bli_cntx_get_l3_nat_ukr_dt()`, -will be `void*` and must be typecast to typed function pointers before being called. +will be `void_fp` and must be typecast to typed function pointers before being called. As a convenience, BLIS defines function pointer types appropriate for usage in these situations. The function pointer type for each operation is given in the third columns of each table, with the `?` taking the place of one of the supported datatype characters. -| kernel operation | l3ukr_t | function pointer type | +| kernel operation | ukr_t | function pointer type | |:-----------------|:----------------------|:----------------------| | gemm | `BLIS_GEMM` | `?gemm_ukr_ft` | | trsm_l | `BLIS_TRSM_L_UKR` | `?trsm_ukr_ft` | | trsm_u | `BLIS_TRSM_U_UKR` | `?trsm_ukr_ft` | | gemmtrsm_l | `BLIS_GEMMTRSM_L_UKR` | `?gemmtrsm_ukr_ft` | | gemmtrsm_u | `BLIS_GEMMTRSM_U_UKR` | `?gemmtrsm_ukr_ft` | - -| kernel operation | l1fkr_t | function pointer type | -|:-----------------|:----------------------|:----------------------| +| | | | | axpy2v | `BLIS_AXPY2V_KER` | `?axpy2v_ft` | | dotaxpyv | `BLIS_DOTAXPYV_KER` | `?dotaxpyv_ft` | | axpyf | `BLIS_AXPYF_KER` | `?axpyf_ft` | | dotxf | `BLIS_DOTXF_KER` | `?dotxf_ft` | | dotxaxpyf | `BLIS_DOTXAXPYF_KER` | `?dotxaxpyf_ft` | - -| kernel operation | l1vkr_t | function pointer type | -|:-----------------|:----------------------|:----------------------| +| | | | | addv | `BLIS_ADDV_KER` | `?addv_ft` | | amaxv | `BLIS_AMAXV_KER` | `?amaxv_ft` | | axpyv | `BLIS_AXPYV_KER` | `?axpyv_ft` | @@ -256,8 +238,8 @@ void bli_?gemm_ ctype* restrict b1, ctype* restrict beta, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` @@ -274,8 +256,8 @@ void bli_?gemm_ukernel ctype* restrict b1, ctype* restrict beta, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` This function simply queries a microkernel function pointer from the context specified by `cntx`. Note that in the case of either method of calling the microkernel, `cntx` must be a valid pointer. (Passing in `NULL` will *not* result in a default context being used.) @@ -373,8 +355,8 @@ void bli_?trsm_l_ ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); void bli_?trsm_u_ @@ -382,8 +364,8 @@ void bli_?trsm_u_ ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` @@ -395,8 +377,8 @@ void bli_?trsm_l_ukernel ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); void bli_?trsm_u_ukernel @@ -404,8 +386,8 @@ void bli_?trsm_u_ukernel ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` @@ -473,8 +455,8 @@ void bli_?gemmtrsm_l_ ctype* restrict b01, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); void bli_?gemmtrsm_u_ @@ -488,8 +470,8 @@ void bli_?gemmtrsm_u_ ctype* restrict b21, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` @@ -507,8 +489,8 @@ void bli_?gemmtrsm_l_ukernel ctype* restrict b01, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); void bli_?gemmtrsm_u_ukernel @@ -522,8 +504,8 @@ void bli_?gemmtrsm_u_ukernel ctype* restrict b21, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, - auxinfo_t* restrict data, - cntx_t* restrict cntx + auxinfo_t* data, + cntx_t* cntx ); ``` @@ -655,7 +637,7 @@ void bli_?axpy2v_ ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, ctype* restrict z, inc_t incz, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -679,7 +661,7 @@ void bli_?dotaxpyv_ ctype* restrict y, inc_t incy, ctype* restrict rho, ctype* restrict z, inc_t incz, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -703,7 +685,7 @@ void bli_?axpyf_ ctype* restrict a, inc_t inca, inc_t lda, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -727,7 +709,7 @@ void bli_?dotxf_ ctype* restrict x, inc_t incx, ctype* restrict beta, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -757,7 +739,7 @@ void bli_?dotxaxpyf_ ctype* restrict beta, ctype* restrict y, inc_t incy, ctype* restrict z, inc_t incz, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -785,7 +767,7 @@ void bli_?addv_ dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -803,7 +785,7 @@ void bli_?amaxv_ dim_t n, ctype* restrict x, inc_t incx, dim_t* restrict index, - cntx_t* restrict cntx + cntx_t* cntx ) ``` Given a vector of length _n_, this kernel returns the zero-based index `index` of the element of vector `x` that contains the largest absolute value (or, in the complex domain, the largest complex modulus). @@ -821,7 +803,7 @@ void bli_?axpyv_ ctype* restrict alpha, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -842,7 +824,7 @@ void bli_?axpbyv_ ctype* restrict x, inc_t incx, ctype* restrict beta, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -861,7 +843,7 @@ void bli_?copyv_ dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -882,7 +864,7 @@ void bli_?dotv_ ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, ctype* restrict rho, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -905,7 +887,7 @@ void bli_?dotxv_ ctype* restrict y, inc_t incy, ctype* restrict beta, ctype* restrict rho, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -922,7 +904,7 @@ void bli_?invertv_ ( dim_t n, ctype* restrict x, inc_t incx, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel inverts all elements of an _n_-length vector `x`. @@ -937,7 +919,7 @@ void bli_?scalv_ dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -957,7 +939,7 @@ void bli_?scal2v_ ctype* restrict alpha, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -976,7 +958,7 @@ void bli_?setv_ dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -995,7 +977,7 @@ void bli_?subv_ dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: @@ -1013,7 +995,7 @@ void bli_?swapv_ dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel swaps corresponding elements of two _n_-length vectors `x` and `y` stored with strides `incx` and `incy`, respectively. @@ -1029,7 +1011,7 @@ void bli_?xpbyv_ ctype* restrict x, inc_t incx, ctype* restrict beta, ctype* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) ``` This kernel performs the following operation: diff --git a/frame/0/bli_l0_check.c b/frame/0/bli_l0_check.c index 02867a22d2..64c9777db9 100644 --- a/frame/0/bli_l0_check.c +++ b/frame/0/bli_l0_check.c @@ -41,13 +41,13 @@ #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const obj_t* psi \ ) \ { \ - bli_l0_xxsc_check( chi, psi ); \ + return bli_l0_xx_check( chi, psi ); \ } GENFRONT( addsc ) @@ -61,12 +61,12 @@ GENFRONT( subsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi \ ) \ { \ - bli_l0_xsc_check( chi ); \ + return bli_l0_x_check( chi ); \ } GENFRONT( invertsc ) @@ -75,13 +75,13 @@ GENFRONT( invertsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const obj_t* norm \ ) \ { \ - bli_l0_xx2sc_check( chi, norm ); \ + return bli_l0_xx2_check( chi, norm ); \ } GENFRONT( absqsc ) @@ -89,7 +89,7 @@ GENFRONT( normfsc ) // ----------------------------------------------------------------------------- -void bli_getsc_check +err_t bli_getsc_check ( const obj_t* chi, const double* zeta_r, @@ -101,21 +101,23 @@ void bli_getsc_check // Check object datatypes. //e_val = bli_check_noninteger_object( chi ); - //bli_check_error_code( e_val ); + //bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_setsc_check +err_t bli_setsc_check ( double zeta_r, double zeta_i, @@ -127,21 +129,23 @@ void bli_setsc_check // Check object datatypes. //e_val = bli_check_floating_object( chi ); - //bli_check_error_code( e_val ); + //bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_unzipsc_check +err_t bli_unzipsc_check ( const obj_t* chi, const obj_t* zeta_r, @@ -152,52 +156,54 @@ void bli_unzipsc_check // Check object datatypes. - e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( chi ); + bli_check_return_error_code( e_val ); - e_val = bli_check_real_object( zeta_r ); - bli_check_error_code( e_val ); + e_val = bli_check_real_object( zeta_r ); + bli_check_return_error_code( e_val ); - e_val = bli_check_real_object( zeta_i ); - bli_check_error_code( e_val ); + e_val = bli_check_real_object( zeta_i ); + bli_check_return_error_code( e_val ); - e_val = bli_check_nonconstant_object( zeta_r ); - bli_check_error_code( e_val ); + e_val = bli_check_nonconstant_object( zeta_r ); + bli_check_return_error_code( e_val ); - e_val = bli_check_nonconstant_object( zeta_i ); - bli_check_error_code( e_val ); + e_val = bli_check_nonconstant_object( zeta_i ); + bli_check_return_error_code( e_val ); - e_val = bli_check_object_real_proj_of( chi, zeta_r ); - bli_check_error_code( e_val ); + e_val = bli_check_object_real_proj_of( chi, zeta_r ); + bli_check_return_error_code( e_val ); - e_val = bli_check_object_real_proj_of( chi, zeta_i ); - bli_check_error_code( e_val ); + e_val = bli_check_object_real_proj_of( chi, zeta_i ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( zeta_r ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( zeta_i ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( zeta_r ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( zeta_i ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_zipsc_check +err_t bli_zipsc_check ( const obj_t* zeta_r, const obj_t* zeta_i, @@ -208,51 +214,53 @@ void bli_zipsc_check // Check object datatypes. - e_val = bli_check_real_object( zeta_r ); - bli_check_error_code( e_val ); + e_val = bli_check_real_object( zeta_r ); + bli_check_return_error_code( e_val ); - e_val = bli_check_real_object( zeta_i ); - bli_check_error_code( e_val ); + e_val = bli_check_real_object( zeta_i ); + bli_check_return_error_code( e_val ); - e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( chi ); + bli_check_return_error_code( e_val ); - e_val = bli_check_nonconstant_object( chi ); - bli_check_error_code( e_val ); + e_val = bli_check_nonconstant_object( chi ); + bli_check_return_error_code( e_val ); - e_val = bli_check_object_real_proj_of( chi, zeta_r ); - bli_check_error_code( e_val ); + e_val = bli_check_object_real_proj_of( chi, zeta_r ); + bli_check_return_error_code( e_val ); - e_val = bli_check_object_real_proj_of( chi, zeta_i ); - bli_check_error_code( e_val ); + e_val = bli_check_object_real_proj_of( chi, zeta_i ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( zeta_r ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( zeta_i ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( zeta_r ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( zeta_i ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_l0_xsc_check +err_t bli_l0_x_check ( const obj_t* chi ) @@ -262,23 +270,25 @@ void bli_l0_xsc_check // Check object datatypes. e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_nonconstant_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_l0_xxsc_check +err_t bli_l0_xx_check ( const obj_t* chi, const obj_t* psi @@ -289,32 +299,34 @@ void bli_l0_xxsc_check // Check object datatypes. e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_noninteger_object( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_nonconstant_object( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_l0_xx2sc_check +err_t bli_l0_xx2_check ( const obj_t* chi, const obj_t* absq @@ -325,35 +337,37 @@ void bli_l0_xx2sc_check // Check object datatypes. e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_nonconstant_object( absq ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_real_object( absq ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_real_proj_of( chi, absq ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( absq ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( absq ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } -void bli_l0_xxbsc_check +err_t bli_l0_xxbool_check ( const obj_t* chi, const obj_t* psi, @@ -365,25 +379,27 @@ void bli_l0_xxbsc_check // Check object datatypes. e_val = bli_check_noninteger_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_noninteger_object( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_scalar_object( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); e_val = bli_check_object_buffer( psi ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } diff --git a/frame/0/bli_l0_check.h b/frame/0/bli_l0_check.h index 1bbb4a7564..a113f6b467 100644 --- a/frame/0/bli_l0_check.h +++ b/frame/0/bli_l0_check.h @@ -40,7 +40,7 @@ #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const obj_t* psi \ @@ -57,7 +57,7 @@ GENTPROT( subsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi \ ); @@ -68,7 +68,7 @@ GENTPROT( invertsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const obj_t* absq \ @@ -81,7 +81,7 @@ GENTPROT( normfsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const double* zeta_r, \ @@ -94,7 +94,7 @@ GENTPROT( getsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ double zeta_r, \ double zeta_i, \ @@ -107,7 +107,7 @@ GENTPROT( setsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* chi, \ const obj_t* zeta_r, \ @@ -120,7 +120,7 @@ GENTPROT( unzipsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ -void PASTEMAC(opname,_check) \ +err_t PASTEMAC(opname,_check) \ ( \ const obj_t* zeta_r, \ const obj_t* zeta_i, \ @@ -131,26 +131,27 @@ GENTPROT( zipsc ) // ----------------------------------------------------------------------------- -void bli_l0_xsc_check +err_t bli_l0_x_check ( const obj_t* chi ); -void bli_l0_xxsc_check +err_t bli_l0_xx_check ( const obj_t* chi, const obj_t* psi ); -void bli_l0_xx2sc_check +err_t bli_l0_xx2_check ( const obj_t* chi, const obj_t* norm ); -void bli_l0_xxbsc_check +err_t bli_l0_xxbool_check ( const obj_t* chi, const obj_t* psi, const bool* is_eq ); + diff --git a/frame/0/bli_l0_ft.h b/frame/0/bli_l0_ft.h index 01d90cc3bd..dfd420bfe8 100644 --- a/frame/0/bli_l0_ft.h +++ b/frame/0/bli_l0_ft.h @@ -42,7 +42,7 @@ #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjchi, \ const ctype* chi, \ @@ -58,7 +58,7 @@ INSERT_GENTDEF( subsc ) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjchi, \ ctype* chi \ @@ -71,7 +71,7 @@ INSERT_GENTDEF( invertsc ) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjchi, \ const ctype* chi, \ @@ -85,7 +85,7 @@ INSERT_GENTDEF( mulsc ) #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype* chi, \ ctype_r* absq \ @@ -98,7 +98,7 @@ INSERT_GENTDEFR( absqsc ) #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype* chi, \ ctype_r* norm \ @@ -111,7 +111,7 @@ INSERT_GENTDEFR( normfsc ) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype* chi, \ ctype* psi \ @@ -124,7 +124,7 @@ INSERT_GENTDEF( sqrtsc ) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype* chi, \ double* zeta_r, \ @@ -138,7 +138,7 @@ INSERT_GENTDEF( getsc ) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ double zeta_r, \ double zeta_i, \ @@ -152,7 +152,7 @@ INSERT_GENTDEF( setsc ) #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype* chi, \ ctype_r* zeta_r, \ @@ -166,7 +166,7 @@ INSERT_GENTDEFR( unzipsc ) #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ -typedef void (*PASTECH2(ch,opname,tsuf)) \ +typedef err_t (*PASTECH2(ch,opname,tsuf)) \ ( \ const ctype_r* zeta_r, \ const ctype_r* zeta_i, \ diff --git a/frame/0/bli_l0_oapi.c b/frame/0/bli_l0_oapi.c index 0bfdbe3b33..e938fee546 100644 --- a/frame/0/bli_l0_oapi.c +++ b/frame/0/bli_l0_oapi.c @@ -41,22 +41,27 @@ #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* absq \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt_chi; \ num_t dt_absq_c = bli_obj_dt_proj_to_complex( absq ); \ \ const void* buf_chi; \ void* buf_absq = bli_obj_buffer_at_off( absq ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, absq ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, absq ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* If chi is a scalar constant, use dt_absq_c to extract the address of the corresponding constant value; otherwise, use the datatype encoded @@ -67,6 +72,7 @@ void PASTEMAC0(opname) \ void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ + return \ f \ ( \ buf_chi, \ @@ -81,13 +87,13 @@ GENFRONT( normfsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt = bli_obj_dt( psi ); \ \ @@ -95,14 +101,20 @@ void PASTEMAC0(opname) \ \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \ void* buf_psi = bli_obj_buffer_at_off( psi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, psi ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, psi ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt ); \ \ + return \ f \ ( \ conjchi, \ @@ -120,26 +132,32 @@ GENFRONT( subsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt = bli_obj_dt( chi ); \ \ conj_t conjchi = bli_obj_conj_status( chi ); \ \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt ); \ \ + return \ f \ ( \ conjchi, \ @@ -153,26 +171,32 @@ GENFRONT( invertsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt = bli_obj_dt( psi ); \ \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \ void* buf_psi = bli_obj_buffer_at_off( psi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, psi ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, psi ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt ); \ \ + return \ f \ ( \ buf_chi, \ @@ -186,14 +210,14 @@ GENFRONT( sqrtsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ double* zeta_r, \ double* zeta_i \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt_chi = bli_obj_dt( chi ); \ num_t dt_def = BLIS_DCOMPLEX; \ @@ -203,9 +227,14 @@ void PASTEMAC0(opname) \ value to maximize precision, and since we don't know if the caller needs just the real or the real and imaginary parts. */ \ void* buf_chi = bli_obj_buffer_for_1x1( dt_def, chi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* The _check() routine prevents integer types, so we know that chi is either a constant or an actual floating-point type. */ \ @@ -216,6 +245,7 @@ void PASTEMAC0(opname) \ void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_use ); \ \ + return \ f \ ( \ buf_chi, \ @@ -230,26 +260,32 @@ GENFRONT( getsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ double zeta_r, \ double zeta_i, \ const obj_t* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt_chi = bli_obj_dt( chi ); \ \ void* buf_chi = bli_obj_buffer_at_off( chi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \ + { \ + r_val = PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ + return \ f \ ( \ zeta_r, \ @@ -264,14 +300,14 @@ GENFRONT( setsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* zeta_r, \ const obj_t* zeta_i \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt_chi; \ num_t dt_zeta_c = bli_obj_dt_proj_to_complex( zeta_r ); \ @@ -280,9 +316,14 @@ void PASTEMAC0(opname) \ \ void* buf_zeta_r = bli_obj_buffer_at_off( zeta_r ); \ void* buf_zeta_i = bli_obj_buffer_at_off( zeta_i ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* If chi is a scalar constant, use dt_zeta_c to extract the address of the corresponding constant value; otherwise, use the datatype encoded @@ -293,6 +334,7 @@ void PASTEMAC0(opname) \ void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ + return \ f \ ( \ buf_chi, \ @@ -307,14 +349,14 @@ GENFRONT( unzipsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* zeta_r, \ const obj_t* zeta_i, \ const obj_t* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ num_t dt_chi = bli_obj_dt( chi ); \ \ @@ -322,14 +364,20 @@ void PASTEMAC0(opname) \ void* buf_zeta_i = bli_obj_buffer_for_1x1( dt_chi, zeta_i ); \ \ void* buf_chi = bli_obj_buffer_at_off( chi ); \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ + return \ f \ ( \ buf_zeta_i, \ diff --git a/frame/0/bli_l0_oapi.h b/frame/0/bli_l0_oapi.h index a34252cf7c..a9b91d90da 100644 --- a/frame/0/bli_l0_oapi.h +++ b/frame/0/bli_l0_oapi.h @@ -40,7 +40,7 @@ #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* absq \ @@ -53,7 +53,7 @@ GENPROT( normfsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* psi \ @@ -69,7 +69,7 @@ GENPROT( subsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi \ ); @@ -80,7 +80,7 @@ GENPROT( invertsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ double* zeta_r, \ @@ -93,7 +93,7 @@ GENPROT( getsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ double zeta_r, \ double zeta_i, \ @@ -106,7 +106,7 @@ GENPROT( setsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* zeta_r, \ @@ -119,7 +119,7 @@ GENPROT( unzipsc ) #undef GENPROT #define GENPROT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* zeta_r, \ const obj_t* zeta_i, \ diff --git a/frame/0/bli_l0_tapi.c b/frame/0/bli_l0_tapi.c index e0cdffcf34..2f87753b10 100644 --- a/frame/0/bli_l0_tapi.c +++ b/frame/0/bli_l0_tapi.c @@ -41,19 +41,21 @@ #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ const ctype* chi, \ ctype* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ ctype chi_conj; \ \ PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ PASTEMAC(ch,kername)( chi_conj, *psi ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC( addsc, adds ) @@ -64,19 +66,21 @@ INSERT_GENTFUNC_BASIC( subsc, subs ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ ctype* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ ctype chi_conj; \ \ PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ PASTEMAC(ch,kername)( chi_conj ); \ PASTEMAC(ch,copys)( chi_conj, *chi ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC( invertsc, inverts ) @@ -85,14 +89,14 @@ INSERT_GENTFUNC_BASIC( invertsc, inverts ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ const ctype* chi, \ ctype* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ if ( PASTEMAC(ch,eq0)( *chi ) ) \ { \ @@ -106,6 +110,8 @@ void PASTEMAC(ch,opname) \ PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ PASTEMAC(ch,kername)( chi_conj, *psi ); \ } \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC( mulsc, scals ) @@ -114,13 +120,13 @@ INSERT_GENTFUNC_BASIC( mulsc, scals ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype_r* absq \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ ctype_r chi_r; \ ctype_r chi_i; \ @@ -135,6 +141,8 @@ void PASTEMAC(ch,opname) \ PASTEMAC(ch,absq2ris)( chi_r, chi_i, *absq, absq_i ); \ \ ( void )chi_i; \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNCR_BASIC0( absqsc ) @@ -143,16 +151,18 @@ INSERT_GENTFUNCR_BASIC0( absqsc ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype_r* norm \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ /* norm = sqrt( chi_r * chi_r + chi_i * chi_i ); */ \ PASTEMAC2(ch,chr,abval2s)( *chi, *norm ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNCR_BASIC0( normfsc ) @@ -161,16 +171,18 @@ INSERT_GENTFUNCR_BASIC0( normfsc ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ /* NOTE: sqrtsc/sqrt2s differs from normfsc/abval2s in the complex domain. */ \ PASTEMAC(ch,sqrt2s)( *chi, *psi ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC0( sqrtsc ) @@ -179,16 +191,18 @@ INSERT_GENTFUNC_BASIC0( sqrtsc ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ double* zeta_r, \ double* zeta_i \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ PASTEMAC2(ch,d,gets)( *chi, *zeta_r, *zeta_i ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC0( getsc ) @@ -197,16 +211,18 @@ INSERT_GENTFUNC_BASIC0( getsc ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ double zeta_r, \ double zeta_i, \ ctype* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ PASTEMAC2(d,ch,sets)( zeta_r, zeta_i, *chi ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC_BASIC0( setsc ) @@ -215,16 +231,18 @@ INSERT_GENTFUNC_BASIC0( setsc ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype_r* zeta_r, \ ctype_r* zeta_i \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ PASTEMAC2(ch,chr,gets)( *chi, *zeta_r, *zeta_i ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNCR_BASIC0( unzipsc ) @@ -233,43 +251,49 @@ INSERT_GENTFUNCR_BASIC0( unzipsc ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ -void PASTEMAC(ch,opname) \ +err_t PASTEMAC(ch,opname) \ ( \ const ctype_r* zeta_r, \ const ctype_r* zeta_i, \ ctype* chi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ PASTEMAC2(chr,ch,sets)( *zeta_r, *zeta_i, *chi ); \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNCR_BASIC0( zipsc ) // ----------------------------------------------------------------------------- -void bli_igetsc +err_t bli_igetsc ( const dim_t* chi, double* zeta_r, double* zeta_i ) { - bli_init_once(); + BLIS_INIT_ONCE(); PASTEMAC2(i,d,gets)( *chi, *zeta_r, *zeta_i ); + + return BLIS_SUCCESS; } -void bli_isetsc +err_t bli_isetsc ( double zeta_r, double zeta_i, dim_t* chi ) { - bli_init_once(); + BLIS_INIT_ONCE(); PASTEMAC2(d,i,sets)( zeta_r, zeta_i, *chi ); + + return BLIS_SUCCESS; } diff --git a/frame/0/bli_l0_tapi.h b/frame/0/bli_l0_tapi.h index b393034103..854604c3de 100644 --- a/frame/0/bli_l0_tapi.h +++ b/frame/0/bli_l0_tapi.h @@ -40,7 +40,7 @@ #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ const ctype* chi, \ @@ -56,7 +56,7 @@ INSERT_GENTPROT_BASIC0( subsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ ctype* chi \ @@ -68,7 +68,7 @@ INSERT_GENTPROT_BASIC0( invertsc ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype_r* absq \ @@ -81,7 +81,7 @@ INSERT_GENTPROTR_BASIC0( normfsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype* psi \ @@ -93,7 +93,7 @@ INSERT_GENTPROT_BASIC0( sqrtsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ double* zeta_r, \ @@ -106,7 +106,7 @@ INSERT_GENTPROT_BASIC0( getsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ double zeta_r, \ double zeta_i, \ @@ -119,7 +119,7 @@ INSERT_GENTPROT_BASIC0( setsc ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ const ctype* chi, \ ctype_r* zeta_r, \ @@ -132,7 +132,7 @@ INSERT_GENTPROTR_BASIC0( unzipsc ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC(ch,opname) \ ( \ const ctype_r* zeta_r, \ const ctype_r* zeta_i, \ @@ -143,14 +143,14 @@ INSERT_GENTPROTR_BASIC0( zipsc ) // ----------------------------------------------------------------------------- -BLIS_EXPORT_BLIS void bli_igetsc +BLIS_EXPORT_BLIS err_t bli_igetsc ( const dim_t* chi, double* zeta_r, double* zeta_i ); -BLIS_EXPORT_BLIS void bli_isetsc +BLIS_EXPORT_BLIS err_t bli_isetsc ( double zeta_r, double zeta_i, diff --git a/frame/0/copysc/bli_copysc.c b/frame/0/copysc/bli_copysc.c index c2e01d07b0..769ad0faf5 100644 --- a/frame/0/copysc/bli_copysc.c +++ b/frame/0/copysc/bli_copysc.c @@ -39,7 +39,7 @@ // an operation that can be used to typecast (copy-cast) a scalar // of one datatype to a scalar of another datatype. -typedef void (*FUNCPTR_T) +typedef err_t (*FUNCPTR_T) ( conj_t conjchi, const void* chi, @@ -55,13 +55,13 @@ static FUNCPTR_T GENARRAY2_ALL(ftypes,copysc); #undef GENFRONT #define GENFRONT( opname ) \ \ -void PASTEMAC0(opname) \ +err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ conj_t conjchi = bli_obj_conj_status( chi ); \ \ @@ -72,9 +72,14 @@ void PASTEMAC0(opname) \ void* buf_chi; \ \ FUNCPTR_T f; \ +\ + err_t r_val; \ \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( chi, psi ); \ + { \ + r_val = PASTEMAC(opname,_check)( chi, psi ); \ + bli_check_return_if_failure( r_val ); \ + } \ \ /* If chi is a scalar constant, use dt_psi to extract the address of the corresponding constant value; otherwise, use the datatype encoded @@ -86,11 +91,13 @@ void PASTEMAC0(opname) \ f = ftypes[dt_chi][dt_psi]; \ \ /* Invoke the void pointer-based function. */ \ - f( \ - conjchi, \ - buf_chi, \ - buf_psi \ - ); \ + return \ + f \ + ( \ + conjchi, \ + buf_chi, \ + buf_psi \ + ); \ } GENFRONT( copysc ) @@ -103,14 +110,14 @@ GENFRONT( copysc ) #undef GENTFUNC2 #define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ -void PASTEMAC2(chx,chy,varname) \ +err_t PASTEMAC2(chx,chy,varname) \ ( \ conj_t conjchi, \ const void* chi, \ void* psi \ ) \ { \ - bli_init_once(); \ + BLIS_INIT_ONCE(); \ \ const ctype_x* chi_cast = chi; \ ctype_y* psi_cast = psi; \ @@ -123,6 +130,8 @@ void PASTEMAC2(chx,chy,varname) \ { \ PASTEMAC2(chx,chy,copys)( *chi_cast, *psi_cast ); \ } \ +\ + return BLIS_SUCCESS; \ } INSERT_GENTFUNC2_BASIC0( copysc ) diff --git a/frame/0/copysc/bli_copysc.h b/frame/0/copysc/bli_copysc.h index cd5481e576..33f8816780 100644 --- a/frame/0/copysc/bli_copysc.h +++ b/frame/0/copysc/bli_copysc.h @@ -40,7 +40,7 @@ #undef GENFRONT #define GENFRONT( opname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC0(opname) \ ( \ const obj_t* chi, \ const obj_t* psi \ @@ -55,7 +55,7 @@ GENFRONT( copysc ) #undef GENTPROT2 #define GENTPROT2( ctype_x, ctype_y, chx, chy, varname ) \ \ -BLIS_EXPORT_BLIS void PASTEMAC2(chx,chy,varname) \ +BLIS_EXPORT_BLIS err_t PASTEMAC2(chx,chy,varname) \ ( \ conj_t conjchi, \ const void* chi, \ diff --git a/frame/1/bli_l1v_tapi.c b/frame/1/bli_l1v_tapi.c index 01e3356d5f..abff96f521 100644 --- a/frame/1/bli_l1v_tapi.c +++ b/frame/1/bli_l1v_tapi.c @@ -59,7 +59,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -96,7 +96,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -133,7 +133,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -172,8 +172,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) \ - cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -213,7 +212,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -255,7 +254,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -293,7 +292,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -327,7 +326,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -363,7 +362,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -398,7 +397,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ diff --git a/frame/1d/bli_l1d_tapi.c b/frame/1d/bli_l1d_tapi.c index 60916cd568..465255d51f 100644 --- a/frame/1d/bli_l1d_tapi.c +++ b/frame/1d/bli_l1d_tapi.c @@ -98,7 +98,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ @@ -178,7 +178,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ @@ -238,7 +238,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ x1 = x + offx; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ @@ -296,7 +296,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ x1 = x + offx; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ @@ -373,7 +373,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ incx = 2*incx; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(chr,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt_r, kerid, cntx ); \ @@ -432,7 +432,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ x1 = x + offx; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ @@ -510,7 +510,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ diff --git a/frame/1f/bli_l1f_tapi.c b/frame/1f/bli_l1f_tapi.c index 04d100cb30..e6735b5863 100644 --- a/frame/1f/bli_l1f_tapi.c +++ b/frame/1f/bli_l1f_tapi.c @@ -63,7 +63,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -107,7 +107,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -152,7 +152,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -202,7 +202,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ @@ -252,7 +252,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_ukr_dt( dt, kerid, cntx ); \ \ diff --git a/frame/1m/bli_l1m_tapi.c b/frame/1m/bli_l1m_tapi.c index 6b802b9fef..88d73ca64d 100644 --- a/frame/1m/bli_l1m_tapi.c +++ b/frame/1m/bli_l1m_tapi.c @@ -63,7 +63,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -128,7 +128,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -201,7 +201,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -268,7 +268,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If alpha is zero, then we set the output matrix to zero. This seemingly minor optimization is important because it will clear @@ -359,7 +359,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -406,7 +406,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If beta is zero, then the operation reduces to copym. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ @@ -493,7 +493,7 @@ void PASTEMAC3(chx,chy,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If beta is zero, then the operation reduces to copym. */ \ if ( PASTEMAC(chy,eq0)( *beta ) ) \ diff --git a/frame/1m/packm/bli_packm_cntl.c b/frame/1m/packm/bli_packm_cntl.c index e99ed9cf3d..d9f0d323a4 100644 --- a/frame/1m/packm/bli_packm_cntl.c +++ b/frame/1m/packm/bli_packm_cntl.c @@ -57,7 +57,7 @@ BLIS_EXPORT_BLIS cntl_t* bli_packm_cntl_create_node #endif // Allocate a packm_params_t struct. - params = bli_sba_acquire( rntm, sizeof( packm_params_t ) ); + bli_sba_acquire( rntm, sizeof( packm_params_t ), ( void** )¶ms ); // Initialize the packm_params_t struct. params->size = sizeof( packm_params_t ); diff --git a/frame/1m/unpackm/bli_unpackm_cntl.c b/frame/1m/unpackm/bli_unpackm_cntl.c index 95d0545bec..f5b7dc31da 100644 --- a/frame/1m/unpackm/bli_unpackm_cntl.c +++ b/frame/1m/unpackm/bli_unpackm_cntl.c @@ -53,6 +53,7 @@ cntl_t* bli_unpackm_cntl_create_node // Allocate an unpackm_params_t struct. params = bli_malloc_intl( sizeof( unpackm_params_t ), &r_val ); + //r_val = bli_sba_acquire( rntm, sizeof( packm_params_t ), ( void** )¶ms ); // Initialize the unpackm_params_t struct. params->size = sizeof( unpackm_params_t ); diff --git a/frame/2/bli_l2_tapi.c b/frame/2/bli_l2_tapi.c index 4bef7c81a2..65dda73974 100644 --- a/frame/2/bli_l2_tapi.c +++ b/frame/2/bli_l2_tapi.c @@ -70,7 +70,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( m_y ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If x has zero elements, or if alpha is zero, scale y by beta and return early. */ \ @@ -147,7 +147,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ @@ -197,7 +197,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ BLIS_TAPI_EX_DECLS \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If x has zero elements, or if alpha is zero, scale y by beta and return early. */ \ @@ -281,7 +281,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ PASTEMAC2(chr,ch,copys)( *alpha, alpha_local ); \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ @@ -338,7 +338,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ @@ -397,7 +397,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ @@ -458,7 +458,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( m ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* If alpha is zero, set x to zero and return early. */ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ diff --git a/frame/3/bli_l3_oapi_ex.c b/frame/3/bli_l3_oapi_ex.c index 20b0294eb0..4019560469 100644 --- a/frame/3/bli_l3_oapi_ex.c +++ b/frame/3/bli_l3_oapi_ex.c @@ -74,11 +74,11 @@ void PASTEMAC(gemm,BLIS_OAPI_EX_SUF) } } - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( c ); @@ -102,7 +102,7 @@ void PASTEMAC(gemm,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -128,11 +128,11 @@ void PASTEMAC(gemmt,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( c ); @@ -153,7 +153,7 @@ void PASTEMAC(gemmt,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -256,11 +256,11 @@ void PASTEMAC(hemm,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( c ); @@ -281,7 +281,7 @@ void PASTEMAC(hemm,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -306,11 +306,11 @@ void PASTEMAC(symm,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( c ); @@ -331,7 +331,7 @@ void PASTEMAC(symm,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -356,11 +356,11 @@ void PASTEMAC(trmm3,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( c ); @@ -381,7 +381,7 @@ void PASTEMAC(trmm3,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -463,11 +463,11 @@ void PASTEMAC(trmm,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( b ); @@ -487,7 +487,7 @@ void PASTEMAC(trmm,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) @@ -510,11 +510,11 @@ void PASTEMAC(trsm,BLIS_OAPI_EX_SUF) { bli_init_once(); - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Default to using native execution. num_t dt = bli_obj_dt( b ); @@ -534,7 +534,7 @@ void PASTEMAC(trsm,BLIS_OAPI_EX_SUF) // If necessary, obtain a valid context from the gks using the induced // method id determined above. - if ( cntx == NULL ) cntx = bli_gks_query_ind_cntx( im ); + bli_gks_query_ind_cntx_if_null( im, &cntx ); // Check the operands. if ( bli_error_checking_is_enabled() ) diff --git a/frame/3/bli_l3_sup.c b/frame/3/bli_l3_sup.c index eedbd9ec51..eece7b744e 100644 --- a/frame/3/bli_l3_sup.c +++ b/frame/3/bli_l3_sup.c @@ -58,7 +58,7 @@ err_t bli_gemmsup // Obtain a valid (native) context from the gks if necessary. // NOTE: This must be done before calling the _check() function, since // that function assumes the context pointer is valid. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( &cntx ); \ // Return early if a microkernel preference-induced transposition would // have been performed and shifted the dimensions outside of the space @@ -86,11 +86,11 @@ err_t bli_gemmsup return BLIS_FAILURE; } - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); #if 0 const num_t dt = bli_obj_dt( c ); @@ -156,7 +156,7 @@ err_t bli_gemmtsup // Obtain a valid (native) context from the gks if necessary. // NOTE: This must be done before calling the _check() function, since // that function assumes the context pointer is valid. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( &cntx ); \ // Return early if the problem dimensions exceed their sup thresholds. // Notice that we do not bother to check whether the microkernel @@ -171,11 +171,11 @@ err_t bli_gemmtsup return BLIS_FAILURE; } - // Initialize a local runtime with global settings if necessary. Note - // that in the case that a runtime is passed in, we make a local copy. + // Initialize a local runtime. Use the global settings if the caller passed + // in a rntm_t* that is NULL. Otherwise, copy that rntm_t's contents to the + // local rntm_t and use it (instead of the caller's) going forward. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // We've now ruled out the possibility that the sup thresholds are // unsatisfied. diff --git a/frame/3/bli_l3_thrinfo.c b/frame/3/bli_l3_thrinfo.c index f866cfd4c5..04da3a4d32 100644 --- a/frame/3/bli_l3_thrinfo.c +++ b/frame/3/bli_l3_thrinfo.c @@ -64,7 +64,7 @@ void bli_l3_sup_thrinfo_free // ----------------------------------------------------------------------------- -void bli_l3_thrinfo_create_root +err_t bli_l3_thrinfo_create_root ( dim_t id, thrcomm_t* gl_comm, @@ -88,7 +88,8 @@ void bli_l3_thrinfo_create_root dim_t work_id = gl_comm_id / ( n_threads / xx_way ); // Create the root thrinfo_t node. - *thread = bli_thrinfo_create + return + bli_thrinfo_create ( rntm, gl_comm, @@ -97,13 +98,14 @@ void bli_l3_thrinfo_create_root work_id, TRUE, bszid, - NULL + NULL, + thread ); } // ----------------------------------------------------------------------------- -void bli_l3_sup_thrinfo_create_root +err_t bli_l3_sup_thrinfo_create_root ( dim_t id, thrcomm_t* gl_comm, @@ -130,7 +132,8 @@ void bli_l3_sup_thrinfo_create_root dim_t work_id = gl_comm_id / ( n_threads / xx_way ); // Create the root thrinfo_t node. - *thread = bli_thrinfo_create + return + bli_thrinfo_create ( rntm, gl_comm, @@ -139,7 +142,8 @@ void bli_l3_sup_thrinfo_create_root work_id, TRUE, bszid, - NULL + NULL, + thread ); } diff --git a/frame/3/bli_l3_thrinfo.h b/frame/3/bli_l3_thrinfo.h index 37a3909fd6..bb89875127 100644 --- a/frame/3/bli_l3_thrinfo.h +++ b/frame/3/bli_l3_thrinfo.h @@ -101,7 +101,7 @@ void bli_l3_sup_thrinfo_free // ----------------------------------------------------------------------------- -void bli_l3_thrinfo_create_root +err_t bli_l3_thrinfo_create_root ( dim_t id, thrcomm_t* gl_comm, @@ -110,7 +110,7 @@ void bli_l3_thrinfo_create_root thrinfo_t** thread ); -void bli_l3_sup_thrinfo_create_root +err_t bli_l3_sup_thrinfo_create_root ( dim_t id, thrcomm_t* gl_comm, diff --git a/frame/3/gemm/bli_gemm_md.c b/frame/3/gemm/bli_gemm_md.c index 1e23d058e0..7ae8ba2f2b 100644 --- a/frame/3/gemm/bli_gemm_md.c +++ b/frame/3/gemm/bli_gemm_md.c @@ -439,7 +439,8 @@ mddm_t bli_gemm_md_rcc // the target datatype. (The packm_blk_var1_md() function has "built-in" // support for packing to 1r (and 1e) schemas, whereas the // packm_blk_var1() function relies on packm kernels for packing to 1r. - const cntx_t* cntx_1m = bli_gks_query_ind_cntx( BLIS_1M ); + const cntx_t* cntx_1m; + bli_gks_query_ind_cntx( BLIS_1M, &cntx_1m ); const func_t* packm_1m_mr = bli_cntx_get_ukrs( BLIS_PACKM_MRXK_KER, cntx_1m ); const func_t* packm_1m_nr = bli_cntx_get_ukrs( BLIS_PACKM_NRXK_KER, cntx_1m ); diff --git a/frame/base/bli_apool.c b/frame/base/bli_apool.c index a42c7103e5..416cbe3e1b 100644 --- a/frame/base/bli_apool.c +++ b/frame/base/bli_apool.c @@ -34,7 +34,7 @@ #include "blis.h" -void bli_apool_init +err_t bli_apool_init ( apool_t* apool ) @@ -83,6 +83,11 @@ void bli_apool_init // ------------------------------------------------------------------------- + // Start off with a zeroed-out apool pool_t structure. + // NOTE: This is especially important because it zeroes out the .block_ptrs + // field, which bli_apool_finalize() uses to decide whether to return early. + bli_pool_clear( pool ); + // Make sure that block_ptrs_len is at least num_blocks. block_ptrs_len = bli_max( block_ptrs_len, num_blocks ); @@ -94,25 +99,13 @@ void bli_apool_init // Allocate the block_ptrs array. array_t** block_ptrs = - bli_malloc_intl( block_ptrs_len * sizeof( array_t* ), &r_val ); + bli_calloc_intl( block_ptrs_len * sizeof( array_t* ), &r_val ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_init(): allocating %d array_t.\n", ( int )num_blocks ); fflush( stdout ); #endif - // Allocate and initialize each entry in the block_ptrs array. - for ( dim_t i = 0; i < num_blocks; ++i ) - { - // Pass in num_elem so the function knows how many elements to - // initially have in each array_t. - bli_apool_alloc_block - ( - num_elem, - &(block_ptrs[i]) - ); - } - // NOTE: The semantics of top_index approximate a stack, where a "full" // stack (no blocks checked out) is one where top_index == 0 and an empty // stack (all blocks checked out) one where top_index == num_blocks. @@ -124,9 +117,16 @@ void bli_apool_init // number line in which blocks are checked out from lowest to highest, // and additional blocks are added at the higher end. + // If the allocation failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + // Initialize the pool_t structure. // NOTE: We don't use the malloc_fp and free_fp fields at the apool_t // level. Nevertheless, we set them to NULL. + // NOTE: Given that the calloc() succeeded, we must set these fields so + // that if any of the below calls to bli_apool_alloc_block() fail, there + // will be enough information in the structure to allow bli_apool_finalize() + // to de-allocate what was allocated. bli_pool_set_block_ptrs( block_ptrs, pool ); bli_pool_set_block_ptrs_len( block_ptrs_len, pool ); bli_pool_set_top_index( 0, pool ); @@ -135,12 +135,29 @@ void bli_apool_init bli_pool_set_align_size( align_size, pool ); bli_pool_set_malloc_fp( NULL, pool ); bli_pool_set_free_fp( NULL, pool ); + + // Allocate and initialize each entry in the block_ptrs array. + for ( dim_t i = 0; i < num_blocks; ++i ) + { + // Pass in num_elem so the function knows how many elements to + // initially have in each array_t. + r_val = bli_apool_alloc_block + ( + num_elem, + &(block_ptrs[i]) + ); + + // If the allocation failed, finalize the apool and return the error. + bli_check_callthen_return_if_failure( bli_apool_finalize( apool ), r_val ); + } + + return BLIS_SUCCESS; } -void bli_apool_alloc_block +err_t bli_apool_alloc_block ( siz_t num_elem, - array_t** array_p + array_t** array ) { err_t r_val; @@ -153,16 +170,20 @@ void bli_apool_alloc_block printf( "bli_apool_alloc_block(): allocating array_t: " ); #endif - // Allocate the array_t via the bli_fmalloc_align() wrapper, which performs - // alignment logic and opaquely saves the original pointer so that it can - // be recovered when it's time to free the block. - array_t* array = bli_malloc_intl( block_size, &r_val ); + // Allocate the array structure. We use calloc() so that all fields are + // initialized to zero, or NULL. + *array = bli_calloc_intl( block_size, &r_val ); + + // If the allocation failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Initialize an array_t struct within the newly allocated memory region. - bli_array_init( num_elem, sizeof( pool_t* ), array ); + r_val = bli_array_init( num_elem, sizeof( pool_t* ), *array ); + + // If the allocation failed, return the error code immediately. + bli_check_return_if_failure( r_val ); - // Save the pointer in the caller's array_t*. - *array_p = array; + return BLIS_SUCCESS; } void bli_apool_free_block @@ -170,55 +191,64 @@ void bli_apool_free_block array_t* array ) { + // Return early if the pointer to the array_t is NULL. + if ( array == NULL ) return; + const siz_t num_elem = bli_array_num_elem( array ); pool_t** buf = bli_array_buf( array ); - // Step through the array and finalize each pool_t. - for ( dim_t i = 0; i < num_elem; ++i ) + // Skip iterating over the buffer if it was never allocated. + if ( buf != NULL ) { - pool_t* pool = buf[ i ]; - - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_apool_free_block(): freeing pool_t %d within array_t.\n", - ( int )i ); - fflush( stdout ); - #endif - - // Finalize and free the current pool_t, if it was created/allocated. - if ( pool != NULL ) + // Step through the array and finalize each pool_t. + for ( dim_t i = 0; i < num_elem; ++i ) { - // Finalize the pool. - bli_pool_finalize( pool ); + pool_t* pool = buf[ i ]; #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_apool_free_block(): pool_t %d: ", ( int )i ); + printf( "bli_apool_free_block(): freeing pool_t %d within array_t.\n", + ( int )i ); + fflush( stdout ); #endif - // Free the pool_t struct. - bli_free_intl( pool ); + // Finalize and free the current pool_t, if it was created/allocated. + if ( pool != NULL ) + { + // Finalize the pool. + bli_pool_finalize( pool ); + + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_apool_free_block(): pool_t %d: ", ( int )i ); + #endif + + // Free the pool_t struct. + bli_free_intl( pool ); + } } - } - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_apool_free_block(): " ); - #endif + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_apool_free_block(): " ); + #endif - // Free the array buffer. - bli_array_finalize( array ); + // Free the array buffer. + bli_array_finalize( array ); + } #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_free_block(): freeing array_t: " ); #endif - // Free the array. + // Free the array structure. bli_free_intl( array ); } -void bli_apool_finalize +err_t bli_apool_finalize ( apool_t* apool ) { + err_t r_val; + // NOTE: Since the apool_t's mutex is now initialized statically, we no // longer need to explicitly destroy it. @@ -236,14 +266,23 @@ void bli_apool_finalize // Query the block_ptrs array. array_t** block_ptrs = bli_pool_block_ptrs( pool ); + // Return early if the block_ptrs array is NULL. This would typically + // indicate that the pool structure was was cleared but never initialized. + if ( block_ptrs == NULL ) return BLIS_SUCCESS; + // Query the total number of blocks currently allocated. siz_t num_blocks = bli_pool_num_blocks( pool ); // Query the top_index of the pool. siz_t top_index = bli_pool_top_index( pool ); - // Sanity check: The top_index should be zero. - if ( top_index != 0 ) bli_abort(); + // Sanity check: The top_index should be zero. If it's not, then at + // least one block is still checked out to a thread. + if ( bli_error_checking_is_enabled() ) + { + r_val = bli_check_outstanding_mem_pool_blocks( top_index ); + bli_check_return_if_failure( r_val ); + } // Free the individual blocks (each an array_t) currently in the pool. for ( dim_t i = 0; i < num_blocks; ++i ) @@ -264,14 +303,22 @@ void bli_apool_finalize // Free the block_ptrs array. bli_free_intl( block_ptrs ); + + // Clear the pool structure. + bli_pool_clear( pool ); + + return BLIS_SUCCESS; } -array_t* bli_apool_checkout_array +err_t bli_apool_checkout_array ( - siz_t n_threads, - apool_t* apool + siz_t n_threads, + const array_t** array, + apool_t* apool ) { + err_t r_val; + // Acquire the apool_t's mutex. bli_apool_lock( apool ); @@ -290,7 +337,10 @@ array_t* bli_apool_checkout_array fflush( stdout ); #endif - bli_apool_grow( 1, apool ); + r_val = bli_apool_grow( 1, apool ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); } // At this point, at least one array_t is guaranteed to be available. @@ -311,7 +361,7 @@ array_t* bli_apool_checkout_array #endif // Select the array_t* at top_index to return to the caller. - array_t* array = block_ptrs[ top_index ]; + array_t* array_p = block_ptrs[ top_index ]; // Increment the pool's top_index. bli_pool_set_top_index( top_index + 1, pool ); @@ -323,10 +373,12 @@ array_t* bli_apool_checkout_array // Resize the array_t according to the number of threads specified by the // caller. (We need one element in the array_t per thread.) - bli_array_resize( n_threads, array ); + bli_array_resize( n_threads, array_p ); - // Return the selected array_t*. - return array; + // Set the array pointer to the selected array_t*. + *array = array_p; + + return BLIS_SUCCESS; } void bli_apool_checkin_array @@ -372,10 +424,11 @@ void bli_apool_checkin_array bli_apool_unlock( apool ); } -pool_t* bli_apool_array_elem +err_t bli_apool_array_elem ( siz_t index, - array_t* array + array_t* array, + pool_t** pool ) { err_t r_val; @@ -389,12 +442,12 @@ pool_t* bli_apool_array_elem // stores in the array_t are pool_t*, that means that the function is // actually returning the address of a pool_t*, or pool_t**, hence the // dereferencing below. - pool_t** pool_p = bli_array_elem( index, array ); - pool_t* pool = *pool_p; + pool_t** pool_pp = bli_array_elem( index, array ); + pool_t* pool_p = *pool_pp; // If the element is NULL, then it means a pool_t has not yet been created // and allocated for the given index (thread id). - if ( pool == NULL ) + if ( pool_p == NULL ) { // Settle on the parameters to use when initializing the pool_t for // the current index within the array_t. @@ -429,10 +482,13 @@ pool_t* bli_apool_array_elem #endif // Allocate the pool_t. - pool = bli_malloc_intl( sizeof( pool_t ), &r_val ); + pool_p = bli_malloc_intl( sizeof( pool_t ), &r_val ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Initialize the pool_t. - bli_pool_init + r_val = bli_pool_init ( num_blocks, block_ptrs_len, @@ -441,25 +497,31 @@ pool_t* bli_apool_array_elem offset_size, malloc_fp, free_fp, - pool + pool_p ); + // If the previous function failed, free the pool_t we just allocated + // and return the error. + bli_check_callthen_return_if_failure( bli_free_intl( pool_p ), r_val ); + // Update the array element with the address to the new pool_t. // NOTE: We pass in the address of the pool_t* since the bli_array // API is generalized for arbitrarily-sized elements, and therefore - // it must always take the address of the data, rather than the - // value (which it can only do if the elem size were fixed). - bli_array_set_elem( &pool, index, array ); + // it must always take the address of the data, rather than the value + // (which it would only be able to do if the elem size were fixed). + bli_array_set_elem( &pool_p, index, array ); } // The array element is now guaranteed to refer to an allocated and // initialized pool_t. - // Return the array element. - return pool; + // Set the pool pointer to the newly allocated and initialized pool_t. + *pool = pool_p; + + return BLIS_SUCCESS; } -void bli_apool_grow +err_t bli_apool_grow ( siz_t num_blocks_add, apool_t* apool @@ -468,7 +530,7 @@ void bli_apool_grow err_t r_val; // If the requested increase is zero, return early. - if ( num_blocks_add == 0 ) return; + if ( num_blocks_add == 0 ) return BLIS_SUCCESS; // Query the underlying pool_t from the apool_t. pool_t* pool = bli_apool_pool( apool ); @@ -507,7 +569,10 @@ void bli_apool_grow // Allocate a new block_ptrs array. array_t** block_ptrs_new = - bli_malloc_intl( block_ptrs_len_new * sizeof( array_t* ), &r_val ); + bli_calloc_intl( block_ptrs_len_new * sizeof( array_t* ), &r_val ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); @@ -547,19 +612,27 @@ void bli_apool_grow fflush( stdout ); #endif + dim_t i; + // Allocate the requested additional blocks in the resized array. - for ( dim_t i = num_blocks_cur; i < num_blocks_new; ++i ) + for ( i = num_blocks_cur; i < num_blocks_new; ++i ) { - bli_apool_alloc_block + r_val = bli_apool_alloc_block ( num_elem, &(block_ptrs[i]) ); + + // If the previous function failed, update the number of blocks in the + // pool to reflect the number that were added and then return the error. + bli_check_callthen_return_if_failure( bli_pool_set_num_blocks( i, pool ), r_val ); } // Update the pool_t struct with the new number of allocated blocks. // Notice that top_index remains unchanged, as do the block_size and // align_size fields. bli_pool_set_num_blocks( num_blocks_new, pool ); + + return BLIS_SUCCESS; } diff --git a/frame/base/bli_apool.h b/frame/base/bli_apool.h index d06f79207b..052d8a45d2 100644 --- a/frame/base/bli_apool.h +++ b/frame/base/bli_apool.h @@ -92,19 +92,30 @@ BLIS_INLINE void bli_apool_set_def_array_len( siz_t def_array_len, apool_t* pool // ----------------------------------------------------------------------------- -void bli_apool_init +err_t bli_apool_init ( apool_t* apool ); -void bli_apool_finalize +err_t bli_apool_finalize ( apool_t* apool ); -array_t* bli_apool_checkout_array +err_t bli_apool_alloc_block ( - siz_t n_threads, - apool_t* apool + siz_t num_elem, + array_t** array + ); +void bli_apool_free_block + ( + array_t* array + ); + +err_t bli_apool_checkout_array + ( + siz_t n_threads, + const array_t** array, + apool_t* apool ); void bli_apool_checkin_array ( @@ -112,28 +123,18 @@ void bli_apool_checkin_array apool_t* apool ); -pool_t* bli_apool_array_elem +err_t bli_apool_array_elem ( siz_t index, - array_t* array + array_t* array, + pool_t** pool ); -void bli_apool_grow +err_t bli_apool_grow ( siz_t num_blocks_add, apool_t* apool ); -void bli_apool_alloc_block - ( - siz_t num_elem, - array_t** array_p - ); -void bli_apool_free_block - ( - array_t* array - ); - - #endif diff --git a/frame/base/bli_arch.c b/frame/base/bli_arch.c index 48b50a7748..1084c43b0e 100644 --- a/frame/base/bli_arch.c +++ b/frame/base/bli_arch.c @@ -67,32 +67,74 @@ // The arch_t id for the currently running hardware. We initialize to -1, // which will be overwritten upon calling bli_arch_set_id(). -static arch_t id = -1; +static arch_t the_id = -1; -arch_t bli_arch_query_id( void ) +err_t bli_arch_query_id( arch_t* id ) { +#ifndef BLIS_CONFIGURETIME_CPUID + + // Make sure the arch_t id has been set. + err_t r_val = bli_arch_set_id_once(); + bli_check_return_if_failure( r_val ); + +#else + + // configure's cpuid driver doesn't look at error codes, so we don't + // bother returning it. bli_arch_set_id_once(); - // Simply return the id that was previously cached. - return id; +#endif + + // Set the arch_t pointer. + *id = the_id; + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -// A pthread structure used in pthread_once(). pthread_once() is guaranteed to -// execute exactly once among all threads that pass in this control object. -static bli_pthread_once_t once_id = BLIS_PTHREAD_ONCE_INIT; +static bli_pthread_mutex_t arch_id_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; -void bli_arch_set_id_once( void ) +err_t bli_arch_set_id_once( void ) { #ifndef BLIS_CONFIGURETIME_CPUID - bli_pthread_once( &once_id, bli_arch_set_id ); + + err_t r_val = BLIS_SUCCESS; + + // If the arch_t id hasn't already been set, continue to the + // mutex acquisition step. + if ( the_id == -1 ) + { + // Acquire the mutex protecting initialization. + bli_pthread_mutex_lock( &arch_id_mutex ); + + // Check the arch_t id again now that we've obtained the lock. + if ( the_id == -1 ) + { + r_val = bli_arch_set_id(); + } + + // Release the mutex protecting initialization. + bli_pthread_mutex_unlock( &arch_id_mutex ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + } + +#else + + // configure's cpuid driver doesn't look at error codes, so we don't + // bother returning it. + bli_arch_set_id(); + #endif + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_arch_set_id( void ) +err_t bli_arch_set_id( void ) { // Check the environment variable BLIS_ARCH_DEBUG to see if the user // requested that we echo the result of the subconfiguration selection. @@ -104,38 +146,10 @@ void bli_arch_set_id( void ) dim_t req_id = bli_env_get_var( "BLIS_ARCH_TYPE", -1 ); #ifndef BLIS_CONFIGURETIME_CPUID - if ( req_id != -1 ) - { - // BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable. - - // If req_id was set to an invalid arch_t value (ie: outside the range - // [0,BLIS_NUM_ARCHS-1]), output an error message and abort. - if ( bli_error_checking_is_enabled() ) - { - err_t e_val = bli_check_valid_arch_id( req_id ); - bli_check_error_code( e_val ); - } - - // At this point, we know that req_id is in the valid range, but we - // don't yet know if it refers to a context that was actually - // initialized. Query the address of an internal context data structure - // corresponding to req_id. This pointer will be NULL if the associated - // subconfig is not available. - const cntx_t* const * req_cntx = bli_gks_lookup_id( req_id ); - - // This function checks the context pointer and aborts with a useful - // error message if the pointer is found to be NULL. - if ( bli_error_checking_is_enabled() ) - { - err_t e_val = bli_check_initialized_gks_cntx( req_cntx ); - bli_check_error_code( e_val ); - } - - // Finally, we can be confident that req_id (1) is in range and (2) - // refers to a context that has been initialized. - id = req_id; - } - else + // If req_id is -1, it indicates that BLIS_ARCH_TYPE was NOT set. This means + // we must query the hardware for the id (or read whatever value was hard- + // coded at configure-time). + if ( req_id == -1 ) #endif { // BLIS_ARCH_TYPE was unset. Proceed with normal subconfiguration @@ -147,104 +161,144 @@ void bli_arch_set_id( void ) defined BLIS_FAMILY_X86_64 || \ defined BLIS_FAMILY_ARM64 || \ defined BLIS_FAMILY_ARM32 - id = bli_cpuid_query_id(); + req_id = bli_cpuid_query_id(); #endif // Intel microarchitectures. #ifdef BLIS_FAMILY_SKX - id = BLIS_ARCH_SKX; + req_id = BLIS_ARCH_SKX; #endif #ifdef BLIS_FAMILY_KNL - id = BLIS_ARCH_KNL; + req_id = BLIS_ARCH_KNL; #endif #ifdef BLIS_FAMILY_KNC - id = BLIS_ARCH_KNC; + req_id = BLIS_ARCH_KNC; #endif #ifdef BLIS_FAMILY_HASWELL - id = BLIS_ARCH_HASWELL; + req_id = BLIS_ARCH_HASWELL; #endif #ifdef BLIS_FAMILY_SANDYBRIDGE - id = BLIS_ARCH_SANDYBRIDGE; + req_id = BLIS_ARCH_SANDYBRIDGE; #endif #ifdef BLIS_FAMILY_PENRYN - id = BLIS_ARCH_PENRYN; + req_id = BLIS_ARCH_PENRYN; #endif // AMD microarchitectures. #ifdef BLIS_FAMILY_ZEN3 - id = BLIS_ARCH_ZEN3; + req_id = BLIS_ARCH_ZEN3; #endif #ifdef BLIS_FAMILY_ZEN2 - id = BLIS_ARCH_ZEN2; + req_id = BLIS_ARCH_ZEN2; #endif #ifdef BLIS_FAMILY_ZEN - id = BLIS_ARCH_ZEN; + req_id = BLIS_ARCH_ZEN; #endif #ifdef BLIS_FAMILY_EXCAVATOR - id = BLIS_ARCH_EXCAVATOR; + req_id = BLIS_ARCH_EXCAVATOR; #endif #ifdef BLIS_FAMILY_STEAMROLLER - id = BLIS_ARCH_STEAMROLLER; + req_id = BLIS_ARCH_STEAMROLLER; #endif #ifdef BLIS_FAMILY_PILEDRIVER - id = BLIS_ARCH_PILEDRIVER; + req_id = BLIS_ARCH_PILEDRIVER; #endif #ifdef BLIS_FAMILY_BULLDOZER - id = BLIS_ARCH_BULLDOZER; + req_id = BLIS_ARCH_BULLDOZER; #endif // ARM microarchitectures. #ifdef BLIS_FAMILY_ARMSVE - id = BLIS_ARCH_ARMSVE; + req_id = BLIS_ARCH_ARMSVE; #endif #ifdef BLIS_FAMILY_A64FX - id = BLIS_ARCH_A64FX; + req_id = BLIS_ARCH_A64FX; #endif #ifdef BLIS_FAMILY_FIRESTORM - id = BLIS_ARCH_FIRESTORM; + req_id = BLIS_ARCH_FIRESTORM; #endif #ifdef BLIS_FAMILY_THUNDERX2 - id = BLIS_ARCH_THUNDERX2; + req_id = BLIS_ARCH_THUNDERX2; #endif #ifdef BLIS_FAMILY_CORTEXA57 - id = BLIS_ARCH_CORTEXA57; + req_id = BLIS_ARCH_CORTEXA57; #endif #ifdef BLIS_FAMILY_CORTEXA53 - id = BLIS_ARCH_CORTEXA53; + req_id = BLIS_ARCH_CORTEXA53; #endif #ifdef BLIS_FAMILY_CORTEXA15 - id = BLIS_ARCH_CORTEXA15; + req_id = BLIS_ARCH_CORTEXA15; #endif #ifdef BLIS_FAMILY_CORTEXA9 - id = BLIS_ARCH_CORTEXA9; + req_id = BLIS_ARCH_CORTEXA9; #endif // IBM microarchitectures. #ifdef BLIS_FAMILY_POWER10 - id = BLIS_ARCH_POWER10; + req_id = BLIS_ARCH_POWER10; #endif #ifdef BLIS_FAMILY_POWER9 - id = BLIS_ARCH_POWER9; + req_id = BLIS_ARCH_POWER9; #endif #ifdef BLIS_FAMILY_POWER7 - id = BLIS_ARCH_POWER7; + req_id = BLIS_ARCH_POWER7; #endif #ifdef BLIS_FAMILY_BGQ - id = BLIS_ARCH_BGQ; + req_id = BLIS_ARCH_BGQ; #endif // Generic microarchitecture. #ifdef BLIS_FAMILY_GENERIC - id = BLIS_ARCH_GENERIC; + req_id = BLIS_ARCH_GENERIC; #endif } - if ( bli_arch_get_logging() ) - fprintf( stderr, "libblis: selecting sub-configuration '%s'.\n", - bli_arch_string( id ) ); +#ifndef BLIS_CONFIGURETIME_CPUID + // If req_id was set to an invalid arch_t value (ie: outside the range + // [0,BLIS_NUM_ARCHS-1]), output an error message and abort. + if ( bli_error_checking_is_enabled() ) + { + err_t e_val = bli_check_valid_arch_id( req_id ); + bli_check_return_error_code( e_val ); + } + + // At this point, we know that req_id is in the valid range, but we + // don't yet know if it refers to a context that was actually + // initialized. Query the address of an internal context data structure + // corresponding to req_id. This pointer will be NULL if the associated + // subconfig is not available. + const cntx_t* const * req_cntx = bli_gks_lookup_id( req_id ); + + // This function checks the context pointer and aborts with a useful + // error message if the pointer is found to be NULL. + if ( bli_error_checking_is_enabled() ) + { + err_t e_val = bli_check_initialized_gks_cntx( req_cntx ); + bli_check_return_error_code( e_val ); + } +#endif - //printf( "blis_arch_query_id(): id = %u\n", id ); + // Finally, we can be confident that req_id (1) is in range and (2) + // refers to a context that has been initialized. So we cache it to + // the local static variable. + the_id = req_id; + + //printf( "blis_arch_query_id(): the_id = %u\n", id ); //exit(1); + + if ( bli_arch_get_logging() ) + { + // Query the string associated with the detected/chosen arch_t id. + // Note that we don't need to check the error code returned by + // bli_arch_string() since we've already confirmed that the arch_t + // id is valid. + const char* str; + bli_arch_string( the_id, &str ); + + fprintf( stderr, "libblis: selecting sub-configuration '%s'.\n", str ); + } + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -287,9 +341,20 @@ static const char* config_name[ BLIS_NUM_ARCHS ] = "generic" }; -const char* bli_arch_string( arch_t id ) +err_t bli_arch_string( arch_t id, const char** str ) { - return config_name[ id ]; +#ifndef BLIS_CONFIGURETIME_CPUID + err_t r_val = BLIS_SUCCESS; + + r_val = bli_check_valid_arch_id( id ); + bli_check_return_error_code( r_val ); +#endif + + // If the caller passed in a valid id, index into the string array and + // "return" the appropriate pointer. + *str = config_name[ id ]; + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- diff --git a/frame/base/bli_arch.h b/frame/base/bli_arch.h index 08af7ae79d..dfb84b0666 100644 --- a/frame/base/bli_arch.h +++ b/frame/base/bli_arch.h @@ -35,16 +35,16 @@ #ifndef BLIS_ARCH_H #define BLIS_ARCH_H -BLIS_EXPORT_BLIS arch_t bli_arch_query_id( void ); +BLIS_EXPORT_BLIS err_t bli_arch_query_id( arch_t* id ); -void bli_arch_set_id_once( void ); -void bli_arch_set_id( void ); +err_t bli_arch_set_id_once( void ); +err_t bli_arch_set_id( void ); -BLIS_EXPORT_BLIS const char* bli_arch_string( arch_t id ); +BLIS_EXPORT_BLIS err_t bli_arch_string( arch_t id, const char** str ); -void bli_arch_set_logging( bool dolog ); -bool bli_arch_get_logging( void ); -void bli_arch_log( const char*, ... ); +void bli_arch_set_logging( bool dolog ); +bool bli_arch_get_logging( void ); +void bli_arch_log( const char*, ... ); #endif diff --git a/frame/base/bli_array.c b/frame/base/bli_array.c index ea47a0024c..716eb0c6ce 100644 --- a/frame/base/bli_array.c +++ b/frame/base/bli_array.c @@ -36,7 +36,7 @@ //#define BLIS_ENABLE_MEM_TRACING -void bli_array_init +err_t bli_array_init ( siz_t num_elem, siz_t elem_size, @@ -45,6 +45,9 @@ void bli_array_init { err_t r_val; + // Start off with a zeroed-out array_t structure. + bli_array_clear( array ); + #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_array_init(): allocating array [%d * %d]: ", ( int )num_elem, ( int )elem_size ); @@ -53,22 +56,29 @@ void bli_array_init // Compute the total size (in bytes) of the array. const size_t array_size = num_elem * elem_size; - // Allocate the array buffer. - void* buf = bli_malloc_intl( array_size, &r_val ); + // Allocate the array buffer. We use calloc() so that all elements are + // initialized to zero, or NULL. This allows us to deallocate only those + // blocks that were allocated in the event of a failure. + void* buf = bli_calloc_intl( array_size, &r_val ); - // Initialize the array elements to zero. THIS IS IMPORANT because - // consumer threads will use the NULL-ness of the array elements to - // determine if the corresponding block (data structure) needs to be - // created/allocated and initialized. - memset( buf, 0, array_size ); + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Initialize the array_t structure. bli_array_set_buf( buf, array ); bli_array_set_num_elem( num_elem, array ); bli_array_set_elem_size( elem_size, array ); + + // Initialize the array elements to zero. THIS IS IMPORANT because + // consumer threads will use the NULL-ness of the array elements to + // determine if the corresponding block (data structure) needs to be + // created/allocated and initialized. + //memset( buf, 0, array_size ); + + return BLIS_SUCCESS; } -void bli_array_resize +err_t bli_array_resize ( siz_t num_elem_new, array_t* array @@ -81,7 +91,7 @@ void bli_array_resize // If the new requested size (number of elements) is less than or equal to // the current size, no action is needed; return early. - if ( num_elem_new <= num_elem_prev ) return; + if ( num_elem_new <= num_elem_prev ) return BLIS_SUCCESS; // At this point, we know that num_elem_prev < num_elem_new, which means // we need to proceed with the resizing. @@ -104,6 +114,9 @@ void bli_array_resize // Allocate a new array buffer. char* buf_new = bli_malloc_intl( array_size_new, &r_val ); + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + // Copy the previous array contents to the new array. memcpy( buf_new, buf_prev, array_size_prev ); @@ -125,9 +138,11 @@ void bli_array_resize // NOTE: The array elem_size field does not need updating. bli_array_set_buf( buf_new, array ); bli_array_set_num_elem( num_elem_new, array ); + + return BLIS_SUCCESS; } -void bli_array_finalize +err_t bli_array_finalize ( array_t* array ) @@ -142,6 +157,8 @@ void bli_array_finalize // Free the buffer. bli_free_intl( buf ); + + return BLIS_SUCCESS; } void* bli_array_elem @@ -151,10 +168,10 @@ void* bli_array_elem ) { // Query the number of elements in the array. - const siz_t num_elem = bli_array_num_elem( array ); + //const siz_t num_elem = bli_array_num_elem( array ); // Sanity check: disallow access beyond the bounds of the array. - if ( num_elem <= index ) bli_abort(); + //if ( num_elem <= index ) bli_abort(); // Query the size of each element in the array. const siz_t elem_size = bli_array_elem_size( array ); @@ -183,31 +200,18 @@ void bli_array_set_elem // Query the buffer from the array as a char*. char* buf = bli_array_buf( array ); -// memcpy() is the only safe way to copy data of unknown type -#if 0 - if ( elem_size == sizeof( void* ) ) - { - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_array_set_elem(): elem_size is %d; setting index %d.\n", - ( int )elem_size, ( int )index ); - fflush( stdout ); - #endif - - // Special case: Handle elem_size = sizeof( void* ) without calling - // memcpy(). - void** buf_vvp = ( void** )buf; - void** elem_vvp = ( void** )elem; - - buf_vvp[ index ] = *elem_vvp; - } - else - { -#endif - // General case: Copy the elem_size bytes from elem to buf at the - // element index specified by index. - memcpy( &buf[ index * elem_size ], elem, ( size_t )elem_size ); -#if 0 - } -#endif + // Copy the elem_size bytes from elem to buf at the element index specified + // by index. + memcpy( &buf[ index * elem_size ], elem, ( size_t )elem_size ); +} + +void bli_array_clear + ( + array_t* array + ) +{ + bli_array_set_buf( NULL, array ); + bli_array_set_num_elem( 0, array ); + bli_array_set_elem_size( 0, array ); } diff --git a/frame/base/bli_array.h b/frame/base/bli_array.h index c1e6ce038a..19912892bf 100644 --- a/frame/base/bli_array.h +++ b/frame/base/bli_array.h @@ -85,18 +85,18 @@ BLIS_INLINE void bli_array_set_elem_size( siz_t elem_size, array_t* array ) \ // ----------------------------------------------------------------------------- -void bli_array_init +err_t bli_array_init ( siz_t num_elem, siz_t elem_size, array_t* array ); -void bli_array_resize +err_t bli_array_resize ( siz_t num_elem_new, array_t* array ); -void bli_array_finalize +err_t bli_array_finalize ( array_t* array ); @@ -113,5 +113,10 @@ void bli_array_set_elem array_t* array ); +void bli_array_clear + ( + array_t* array + ); + #endif diff --git a/frame/base/bli_check.c b/frame/base/bli_check.c index 16c418b49e..2a3a109581 100644 --- a/frame/base/bli_check.c +++ b/frame/base/bli_check.c @@ -37,6 +37,7 @@ // -- General stuff ------------------------------------------------------------ +#if 1 err_t bli_check_error_code_helper( gint_t code, const char* file, guint_t line ) { if ( code == BLIS_SUCCESS ) return code; @@ -56,6 +57,7 @@ err_t bli_check_error_code_helper( gint_t code, const char* file, guint_t line ) return code; } +#endif err_t bli_check_valid_error_level( errlev_t level ) { @@ -68,6 +70,17 @@ err_t bli_check_valid_error_level( errlev_t level ) return e_val; } +err_t bli_check_valid_error_mode( errmode_t mode ) +{ + err_t e_val = BLIS_SUCCESS; + + if ( mode != BLIS_ERROR_RETURN && + mode != BLIS_ERROR_ABORT ) + e_val = BLIS_INVALID_ERROR_HANDLING_MODE; + + return e_val; +} + err_t bli_check_null_pointer( const void* ptr ) { err_t e_val = BLIS_SUCCESS; @@ -677,8 +690,32 @@ err_t bli_check_upper_or_lower_object( const obj_t* a ) return e_val; } +// -- Induced method-related checks -------------------------------------------- + +err_t bli_check_valid_ind( ind_t im ) +{ + err_t e_val = BLIS_SUCCESS; + + if ( !bli_is_1m( im ) && + !bli_is_nat( im ) ) + e_val = BLIS_INVALID_IND; + + return e_val; +} + // -- Partitioning-related checks ---------------------------------------------- +err_t bli_check_valid_direct( dir_t direct ) +{ + err_t e_val = BLIS_SUCCESS; + + if ( !bli_is_fwd( direct ) && + !bli_is_bwd( direct ) ) + e_val = BLIS_INVALID_DIRECTION; + + return e_val; +} + err_t bli_check_valid_3x1_subpart( subpart_t part ) { err_t e_val = BLIS_SUCCESS; @@ -729,6 +766,42 @@ err_t bli_check_valid_3x3_subpart( subpart_t part ) return e_val; } +err_t bli_check_valid_row_offset( dim_t i, obj_t* a ) +{ + err_t e_val = BLIS_SUCCESS; + + const dim_t m = bli_obj_length( a ); + + if ( i < 0 ) e_val = BLIS_ROW_OFFSET_LESS_THAN_ZERO; + else if ( m <= i ) e_val = BLIS_ROW_OFFSET_EXCEEDS_NUM_ROWS; + + return e_val; +} + +err_t bli_check_valid_col_offset( dim_t j, obj_t* a ) +{ + err_t e_val = BLIS_SUCCESS; + + const dim_t n = bli_obj_width( a ); + + if ( j < 0 ) e_val = BLIS_COL_OFFSET_LESS_THAN_ZERO; + else if ( n <= j ) e_val = BLIS_COL_OFFSET_EXCEEDS_NUM_COLS; + + return e_val; +} + +err_t bli_check_valid_vector_offset( dim_t i, obj_t* x ) +{ + err_t e_val = BLIS_SUCCESS; + + const dim_t n = bli_obj_vector_dim( x ); + + if ( i < 0 ) e_val = BLIS_VECTOR_OFFSET_LESS_THAN_ZERO; + else if ( n <= i ) e_val = BLIS_VECTOR_OFFSET_EXCEEDS_NUM_ELEM; + + return e_val; +} + // -- Control tree-related checks ---------------------------------------------- err_t bli_check_valid_cntl( const void* cntl ) @@ -871,6 +944,18 @@ err_t bli_check_alignment_is_mult_of_ptr_size( size_t align_size ) return e_val; } +err_t bli_check_outstanding_mem_pool_blocks( siz_t top_index ) +{ + err_t e_val = BLIS_SUCCESS; + + // This function returns an error code if the top_index is not zero. + + if ( top_index != 0 ) + e_val = BLIS_MEM_POOL_BLOCKS_OUTSTANDING; + + return e_val; +} + // -- Object-related errors ---------------------------------------------------- err_t bli_check_object_alias_of( const obj_t* a, const obj_t* b ) @@ -958,3 +1043,16 @@ err_t bli_check_valid_kc_mod_mult( const blksz_t* kc, const blksz_t* kr ) return BLIS_SUCCESS; } +// -- Thread-related errors ---------------------------------------------------- + +err_t bli_check_num_threads_created( dim_t nt_req, dim_t nt_actual ) +{ + err_t e_val = BLIS_SUCCESS; + + if ( nt_req != nt_actual ) + if ( nt_actual != 1 ) + e_val = BLIS_EXPECTED_DIFF_NUM_THREADS; + + return e_val; +} + diff --git a/frame/base/bli_check.h b/frame/base/bli_check.h index f1e2201a7e..07506152b2 100644 --- a/frame/base/bli_check.h +++ b/frame/base/bli_check.h @@ -37,6 +37,7 @@ BLIS_EXPORT_BLIS err_t bli_check_error_code_helper( gint_t code, const char* file, guint_t line ); err_t bli_check_valid_error_level( errlev_t level ); +err_t bli_check_valid_error_mode( errmode_t mode ); err_t bli_check_null_pointer( const void* ptr ); @@ -88,9 +89,15 @@ err_t bli_check_object_struc( const obj_t* a, struc_t struc ); err_t bli_check_upper_or_lower_object( const obj_t* a ); +err_t bli_check_valid_ind( ind_t im ); + +err_t bli_check_valid_direct( dir_t direct ); err_t bli_check_valid_3x1_subpart( subpart_t part ); err_t bli_check_valid_1x3_subpart( subpart_t part ); err_t bli_check_valid_3x3_subpart( subpart_t part ); +err_t bli_check_valid_row_offset( dim_t i, obj_t* a ); +err_t bli_check_valid_col_offset( dim_t j, obj_t* a ); +err_t bli_check_valid_vector_offset( dim_t i, obj_t* x ); err_t bli_check_valid_cntl( const void* cntl ); @@ -106,6 +113,7 @@ err_t bli_check_if_exhausted_pool( const pool_t* pool ); err_t bli_check_sufficient_stack_buf_size( const cntx_t* cntx ); err_t bli_check_alignment_is_power_of_two( size_t align_size ); err_t bli_check_alignment_is_mult_of_ptr_size( size_t align_size ); +err_t bli_check_outstanding_mem_pool_blocks( siz_t top_index ); err_t bli_check_object_alias_of( const obj_t* a, const obj_t* b ); @@ -116,3 +124,5 @@ err_t bli_check_valid_mc_mod_mult( const blksz_t* mc, const blksz_t* mr ); err_t bli_check_valid_nc_mod_mult( const blksz_t* nc, const blksz_t* nr ); err_t bli_check_valid_kc_mod_mult( const blksz_t* kc, const blksz_t* kr ); +err_t bli_check_num_threads_created( dim_t nt_req, dim_t nt_actual ); + diff --git a/frame/base/bli_cntl.c b/frame/base/bli_cntl.c index b22ddbee0b..69a7c43e1c 100644 --- a/frame/base/bli_cntl.c +++ b/frame/base/bli_cntl.c @@ -53,7 +53,7 @@ cntl_t* bli_cntl_create_node #endif // Allocate the cntl_t struct. - cntl = bli_sba_acquire( rntm, sizeof( cntl_t ) ); + bli_sba_acquire( rntm, sizeof( cntl_t ), ( void** )&cntl ); bli_cntl_set_family( family, cntl ); bli_cntl_set_bszid( bszid, cntl ); @@ -273,7 +273,8 @@ cntl_t* bli_cntl_copy // struct. uint64_t params_size = bli_cntl_params_size( cntl ); void* params_orig = bli_cntl_params( cntl ); - void* params_copy = bli_sba_acquire( rntm, ( size_t )params_size ); + void* params_copy; + bli_sba_acquire( rntm, ( size_t )params_size, ( void** )¶ms_copy ); // Copy the original params struct to the new memory region. memcpy( params_copy, params_orig, params_size ); diff --git a/frame/base/bli_error.c b/frame/base/bli_error.c index f4933d9629..9704fe7113 100644 --- a/frame/base/bli_error.c +++ b/frame/base/bli_error.c @@ -39,9 +39,11 @@ static const char *bli_error_string[-BLIS_ERROR_CODE_MAX] = { [-BLIS_INVALID_ERROR_CHECKING_LEVEL] = "Invalid error checking level.", + [-BLIS_INVALID_ERROR_HANDLING_MODE] = "Invalid error handling mode.", [-BLIS_UNDEFINED_ERROR_CODE] = "Undefined error code.", [-BLIS_NULL_POINTER] = "Encountered unexpected null pointer.", [-BLIS_NOT_YET_IMPLEMENTED] = "Requested functionality not yet implemented.", + [-BLIS_REJECT_EXEC] = "Execution path rejected. NOTE: This error message should never be displayed.", [-BLIS_INVALID_SIDE] = "Invalid side parameter value.", [-BLIS_INVALID_UPLO] = "Invalid uplo_t parameter value.", @@ -83,9 +85,18 @@ static const char *bli_error_string[-BLIS_ERROR_CODE_MAX] = [-BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT] = "Expected upper or lower triangular object.", + [-BLIS_INVALID_IND] = "Invalid ind_t parameter value.", + + [-BLIS_INVALID_DIRECTION] = "Invalid dir_t parameter value.", [-BLIS_INVALID_3x1_SUBPART] = "Encountered invalid 3x1 (vertical) subpartition label.", [-BLIS_INVALID_1x3_SUBPART] = "Encountered invalid 1x3 (horizontal) subpartition label.", [-BLIS_INVALID_3x3_SUBPART] = "Encountered invalid 3x3 (diagonal) subpartition label.", + [-BLIS_ROW_OFFSET_LESS_THAN_ZERO] = "Encountered row offset less than zero.", + [-BLIS_ROW_OFFSET_EXCEEDS_NUM_ROWS] = "Encountered row offset that exceeds the number of matrix rows (m dimension).", + [-BLIS_COL_OFFSET_LESS_THAN_ZERO] = "Encountered column offset less than zero.", + [-BLIS_COL_OFFSET_EXCEEDS_NUM_COLS] = "Encountered column offset that exceeds the number of matrix columns (n dimension).", + [-BLIS_VECTOR_OFFSET_LESS_THAN_ZERO] = "Encountered vector offset less than zero.", + [-BLIS_VECTOR_OFFSET_EXCEEDS_NUM_ELEM] = "Encountered vector offset that exceeds the number of vector elements.", [-BLIS_UNEXPECTED_NULL_CONTROL_TREE] = "Encountered unexpected null control tree node.", @@ -100,6 +111,7 @@ static const char *bli_error_string[-BLIS_ERROR_CODE_MAX] = [-BLIS_INSUFFICIENT_STACK_BUF_SIZE] = "Configured maximum stack buffer size is insufficient for register blocksizes currently in use.", [-BLIS_ALIGNMENT_NOT_POWER_OF_TWO] = "Encountered memory alignment value that is either zero or not a power of two.", [-BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE] = "Encountered memory alignment value that is not a multiple of sizeof(void*).", + [-BLIS_MEM_POOL_BLOCKS_OUTSTANDING] = "One or more blocks still checked out at the time a memory pool was finalized.", [-BLIS_EXPECTED_OBJECT_ALIAS] = "Expected object to be alias.", @@ -112,52 +124,166 @@ static const char *bli_error_string[-BLIS_ERROR_CODE_MAX] = [-BLIS_NC_MAX_NONMULTIPLE_OF_NR] = "Maximum NC is non-multiple of NR for one or more datatypes.", [-BLIS_KC_DEF_NONMULTIPLE_OF_KR] = "Default KC is non-multiple of KR for one or more datatypes.", [-BLIS_KC_MAX_NONMULTIPLE_OF_KR] = "Maximum KC is non-multiple of KR for one or more datatypes.", + + [-BLIS_EXPECTED_DIFF_NUM_THREADS] = "A different number of threads was created than was requested.", }; // ----------------------------------------------------------------------------- -void bli_print_msg( const char* str, const char* file, guint_t line ) +// A mutex to allow synchronous access to the variable controlling the error +// checking level. +static bli_pthread_mutex_t err_level_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; + +// Set the default (initial) error checking level based on how BLIS was +// configured. Note that we declare the variable as thread-local so that +// application threads can operate BLIS under different error handling +// regimes. +#ifdef BLIS_ENABLE_ERROR_CHECKING +static BLIS_THREAD_LOCAL errlev_t bli_err_chk_level = BLIS_FULL_ERROR_CHECKING; +#else +static BLIS_THREAD_LOCAL errlev_t bli_err_chk_level = BLIS_NO_ERROR_CHECKING; +#endif + +// Primary user APIs. + +bool bli_error_checking_is_enabled( void ) { - fprintf( stderr, "\n" ); - fprintf( stderr, "libblis: %s (line %lu):\n", file, ( long unsigned int )line ); - fprintf( stderr, "libblis: %s\n", str ); - fflush( stderr ); + return bli_error_checking_level() != BLIS_NO_ERROR_CHECKING; } -void bli_abort( void ) +err_t bli_error_checking_enable( void ) { - fprintf( stderr, "libblis: Aborting.\n" ); - //raise( SIGABRT ); - abort(); + return bli_error_checking_level_set( BLIS_FULL_ERROR_CHECKING ); } -// ----------------------------------------------------------------------------- +err_t bli_error_checking_disable( void ) +{ + return bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); +} -// Current error checking level. -static BLIS_THREAD_LOCAL errlev_t bli_err_chk_level = BLIS_FULL_ERROR_CHECKING; +// Lower-level APIs. errlev_t bli_error_checking_level( void ) { return bli_err_chk_level; } -void bli_error_checking_level_set( errlev_t new_level ) +err_t bli_error_checking_level_set( errlev_t new_level ) { err_t e_val; e_val = bli_check_valid_error_level( new_level ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + // Acquire the mutex protecting bli_err_chk_level. + bli_pthread_mutex_lock( &err_level_mutex ); + + // BEGIN CRITICAL SECTION + { + bli_err_chk_level = new_level; + } + // END CRITICAL SECTION + + // Release the mutex protecting bli_err_chk_level. + bli_pthread_mutex_unlock( &err_level_mutex ); - bli_err_chk_level = new_level; + return BLIS_SUCCESS; } -bool bli_error_checking_is_enabled( void ) +// ----------------------------------------------------------------------------- + +// A mutex to allow synchronous access to the variable controlling the error +// handling mode. +static bli_pthread_mutex_t err_mode_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; + +// Set the default (initial) error handling mode based on how BLIS was +// configured. Note that we declare the variable as thread-local so that +// application threads can operate BLIS under different error handling +// regimes. +#ifdef BLIS_ENABLE_ERROR_RETURN +static BLIS_THREAD_LOCAL errmode_t bli_err_hand_mode = BLIS_ERROR_RETURN; +#else // #ifdef BLIS_ENABLE_ERROR_ABORT +static BLIS_THREAD_LOCAL errmode_t bli_err_hand_mode = BLIS_ERROR_ABORT; +#endif + +// Primary user APIs. + +bool bli_error_mode_is_return( void ) { - return bli_error_checking_level() != BLIS_NO_ERROR_CHECKING; + return bli_error_mode() == BLIS_ERROR_RETURN; +} + +bool bli_error_mode_is_abort( void ) +{ + return bli_error_mode() == BLIS_ERROR_ABORT; +} + +err_t bli_error_mode_set_return( void ) +{ + return bli_error_mode_set( BLIS_ERROR_RETURN ); } +err_t bli_error_mode_set_abort( void ) +{ + return bli_error_mode_set( BLIS_ERROR_ABORT ); +} + +// Lower-level APIs. + +errmode_t bli_error_mode( void ) +{ + return bli_err_hand_mode; +} + +err_t bli_error_mode_set( errmode_t new_mode ) +{ + err_t e_val; + + e_val = bli_check_valid_error_mode( new_mode ); + bli_check_return_error_code( e_val ); + + // Acquire the mutex protecting bli_err_hand_mode. + bli_pthread_mutex_lock( &err_mode_mutex ); + + // BEGIN CRITICAL SECTION + { + bli_err_hand_mode = new_mode; + } + // END CRITICAL SECTION + + // Release the mutex protecting bli_err_chk_level. + bli_pthread_mutex_unlock( &err_mode_mutex ); + + return BLIS_SUCCESS; +} + +// ----------------------------------------------------------------------------- + const char* bli_error_string_for_code( gint_t code ) { + // If the caller's error code is out of range, use a special error code to + // signify this. + if ( code <= BLIS_ERROR_CODE_MIN || BLIS_ERROR_CODE_MAX <= code ) + code = BLIS_UNDEFINED_ERROR_CODE; + + // Return the address of the string corresponding to the chosen error code. return bli_error_string[-code]; } +// ----------------------------------------------------------------------------- + +void bli_print_msg( const char* str, const char* file, guint_t line ) +{ + fprintf( stderr, "\n" ); + fprintf( stderr, "libblis: %s (line %lu):\n", file, ( long unsigned int )line ); + fprintf( stderr, "libblis: %s\n", str ); + fflush( stderr ); +} + +void bli_abort( void ) +{ + fprintf( stderr, "libblis: Aborting.\n" ); + //raise( SIGABRT ); + abort(); +} + diff --git a/frame/base/bli_error.h b/frame/base/bli_error.h index f3037e2c21..71f87073c8 100644 --- a/frame/base/bli_error.h +++ b/frame/base/bli_error.h @@ -34,13 +34,29 @@ */ -BLIS_EXPORT_BLIS errlev_t bli_error_checking_level( void ); -BLIS_EXPORT_BLIS void bli_error_checking_level_set( errlev_t new_level ); +BLIS_EXPORT_BLIS bool bli_error_checking_is_enabled( void ); +BLIS_EXPORT_BLIS err_t bli_error_checking_enable( void ); +BLIS_EXPORT_BLIS err_t bli_error_checking_disable( void ); -BLIS_EXPORT_BLIS bool bli_error_checking_is_enabled( void ); +BLIS_EXPORT_BLIS errlev_t bli_error_checking_level( void ); +BLIS_EXPORT_BLIS err_t bli_error_checking_level_set( errlev_t new_level ); -void bli_print_msg( const char* str, const char* file, guint_t line ); -BLIS_EXPORT_BLIS void bli_abort( void ); +// ----------------------------------------------------------------------------- -const char* bli_error_string_for_code( gint_t code ); +BLIS_EXPORT_BLIS bool bli_error_mode_is_return( void ); +BLIS_EXPORT_BLIS bool bli_error_mode_is_abort( void ); +BLIS_EXPORT_BLIS err_t bli_error_mode_set_return( void ); +BLIS_EXPORT_BLIS err_t bli_error_mode_set_abort( void ); + +BLIS_EXPORT_BLIS errmode_t bli_error_mode( void ); +BLIS_EXPORT_BLIS err_t bli_error_mode_set( errmode_t new_mode ); + +// ----------------------------------------------------------------------------- + +BLIS_EXPORT_BLIS const char* bli_error_string_for_code( gint_t code ); + +// ----------------------------------------------------------------------------- + +void bli_print_msg( const char* str, const char* file, guint_t line ); +BLIS_EXPORT_BLIS void bli_abort( void ); diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index 094810d9d2..5a1cb29bd9 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -52,184 +52,268 @@ typedef void (*nat_cntx_init_ft)( cntx_t* cntx ); typedef void (*ref_cntx_init_ft)( cntx_t* cntx ); typedef void (*ind_cntx_init_ft)( ind_t method, cntx_t* cntx ); +// A boolean that tracks whether bli_gks_init() has completed successfully. +static bool gks_is_init = FALSE; + // ----------------------------------------------------------------------------- -void bli_gks_init( void ) +bool bli_gks_is_init( void ) { - { - // Initialize the internal data structure we use to track registered - // contexts. - bli_gks_init_index(); + return gks_is_init; +} + +void bli_gks_mark_init( void ) +{ + gks_is_init = TRUE; +} - // Register a context for each architecture that was #define'd in - // bli_config.h. +void bli_gks_mark_uninit( void ) +{ + gks_is_init = FALSE; +} - // -- Intel architectures ---------------------------------------------- +// ----------------------------------------------------------------------------- + +err_t bli_gks_init( void ) +{ + err_t r_val; + + // NOTE: We assume this function is only called by one thread. + + // Sanity check: Return early if the API is already initialized. + if ( bli_gks_is_init() ) return BLIS_SUCCESS; + + // Initialize the internal data structure we use to track registered + // contexts. + bli_gks_init_index(); + + // Register a context for each architecture that was #define'd in + // bli_config.h. If any registration fails, finalize the gks before + // returning the error code. + + // -- Intel architectures ---------------------------------------------- #ifdef BLIS_CONFIG_SKX - bli_gks_register_cntx( BLIS_ARCH_SKX, bli_cntx_init_skx, - bli_cntx_init_skx_ref, - bli_cntx_init_skx_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_SKX, bli_cntx_init_skx, + bli_cntx_init_skx_ref, + bli_cntx_init_skx_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_KNL - bli_gks_register_cntx( BLIS_ARCH_KNL, bli_cntx_init_knl, - bli_cntx_init_knl_ref, - bli_cntx_init_knl_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_KNL, bli_cntx_init_knl, + bli_cntx_init_knl_ref, + bli_cntx_init_knl_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_KNC - bli_gks_register_cntx( BLIS_ARCH_KNC, bli_cntx_init_knc, - bli_cntx_init_knc_ref, - bli_cntx_init_knc_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_KNC, bli_cntx_init_knc, + bli_cntx_init_knc_ref, + bli_cntx_init_knc_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_HASWELL - bli_gks_register_cntx( BLIS_ARCH_HASWELL, bli_cntx_init_haswell, - bli_cntx_init_haswell_ref, - bli_cntx_init_haswell_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_HASWELL, bli_cntx_init_haswell, + bli_cntx_init_haswell_ref, + bli_cntx_init_haswell_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_SANDYBRIDGE - bli_gks_register_cntx( BLIS_ARCH_SANDYBRIDGE, bli_cntx_init_sandybridge, - bli_cntx_init_sandybridge_ref, - bli_cntx_init_sandybridge_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_SANDYBRIDGE, bli_cntx_init_sandybridge, + bli_cntx_init_sandybridge_ref, + bli_cntx_init_sandybridge_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_PENRYN - bli_gks_register_cntx( BLIS_ARCH_PENRYN, bli_cntx_init_penryn, - bli_cntx_init_penryn_ref, - bli_cntx_init_penryn_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_PENRYN, bli_cntx_init_penryn, + bli_cntx_init_penryn_ref, + bli_cntx_init_penryn_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- AMD architectures ------------------------------------------------ + // -- AMD architectures ------------------------------------------------ #ifdef BLIS_CONFIG_ZEN3 - bli_gks_register_cntx( BLIS_ARCH_ZEN3, bli_cntx_init_zen3, - bli_cntx_init_zen3_ref, - bli_cntx_init_zen3_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_ZEN3, bli_cntx_init_zen3, + bli_cntx_init_zen3_ref, + bli_cntx_init_zen3_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_ZEN2 - bli_gks_register_cntx( BLIS_ARCH_ZEN2, bli_cntx_init_zen2, - bli_cntx_init_zen2_ref, - bli_cntx_init_zen2_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_ZEN2, bli_cntx_init_zen2, + bli_cntx_init_zen2_ref, + bli_cntx_init_zen2_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_ZEN - bli_gks_register_cntx( BLIS_ARCH_ZEN, bli_cntx_init_zen, - bli_cntx_init_zen_ref, - bli_cntx_init_zen_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_ZEN, bli_cntx_init_zen, + bli_cntx_init_zen_ref, + bli_cntx_init_zen_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_EXCAVATOR - bli_gks_register_cntx( BLIS_ARCH_EXCAVATOR, bli_cntx_init_excavator, - bli_cntx_init_excavator_ref, - bli_cntx_init_excavator_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_EXCAVATOR, bli_cntx_init_excavator, + bli_cntx_init_excavator_ref, + bli_cntx_init_excavator_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_STEAMROLLER - bli_gks_register_cntx( BLIS_ARCH_STEAMROLLER, bli_cntx_init_steamroller, - bli_cntx_init_steamroller_ref, - bli_cntx_init_steamroller_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_STEAMROLLER, bli_cntx_init_steamroller, + bli_cntx_init_steamroller_ref, + bli_cntx_init_steamroller_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_PILEDRIVER - bli_gks_register_cntx( BLIS_ARCH_PILEDRIVER, bli_cntx_init_piledriver, - bli_cntx_init_piledriver_ref, - bli_cntx_init_piledriver_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_PILEDRIVER, bli_cntx_init_piledriver, + bli_cntx_init_piledriver_ref, + bli_cntx_init_piledriver_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_BULLDOZER - bli_gks_register_cntx( BLIS_ARCH_BULLDOZER, bli_cntx_init_bulldozer, - bli_cntx_init_bulldozer_ref, - bli_cntx_init_bulldozer_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_BULLDOZER, bli_cntx_init_bulldozer, + bli_cntx_init_bulldozer_ref, + bli_cntx_init_bulldozer_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- ARM architectures ------------------------------------------------ + // -- ARM architectures ------------------------------------------------ - // -- ARM-SVE -- + // -- ARM-SVE -- #ifdef BLIS_CONFIG_ARMSVE - bli_gks_register_cntx( BLIS_ARCH_ARMSVE, bli_cntx_init_armsve, - bli_cntx_init_armsve_ref, - bli_cntx_init_armsve_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_ARMSVE, bli_cntx_init_armsve, + bli_cntx_init_armsve_ref, + bli_cntx_init_armsve_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_A64FX - bli_gks_register_cntx( BLIS_ARCH_A64FX, bli_cntx_init_a64fx, - bli_cntx_init_a64fx_ref, - bli_cntx_init_a64fx_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_A64FX, bli_cntx_init_a64fx, + bli_cntx_init_a64fx_ref, + bli_cntx_init_a64fx_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- ARM-NEON (4 pipes x 128-bit vectors) -- + // -- ARM-NEON (4 pipes x 128-bit vectors) -- #ifdef BLIS_CONFIG_FIRESTORM - bli_gks_register_cntx( BLIS_ARCH_FIRESTORM, bli_cntx_init_firestorm, - bli_cntx_init_firestorm_ref, - bli_cntx_init_firestorm_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_FIRESTORM, bli_cntx_init_firestorm, + bli_cntx_init_firestorm_ref, + bli_cntx_init_firestorm_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- ARM (2 pipes x 128-bit vectors) -- + // -- ARM (2 pipes x 128-bit vectors) -- #ifdef BLIS_CONFIG_THUNDERX2 - bli_gks_register_cntx( BLIS_ARCH_THUNDERX2, bli_cntx_init_thunderx2, - bli_cntx_init_thunderx2_ref, - bli_cntx_init_thunderx2_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_THUNDERX2, bli_cntx_init_thunderx2, + bli_cntx_init_thunderx2_ref, + bli_cntx_init_thunderx2_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_CORTEXA57 - bli_gks_register_cntx( BLIS_ARCH_CORTEXA57, bli_cntx_init_cortexa57, - bli_cntx_init_cortexa57_ref, - bli_cntx_init_cortexa57_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_CORTEXA57, bli_cntx_init_cortexa57, + bli_cntx_init_cortexa57_ref, + bli_cntx_init_cortexa57_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_CORTEXA53 - bli_gks_register_cntx( BLIS_ARCH_CORTEXA53, bli_cntx_init_cortexa53, - bli_cntx_init_cortexa53_ref, - bli_cntx_init_cortexa53_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_CORTEXA53, bli_cntx_init_cortexa53, + bli_cntx_init_cortexa53_ref, + bli_cntx_init_cortexa53_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- ARM (older 32-bit microarchitectures) -- + // -- ARM (older 32-bit microarchitectures) -- #ifdef BLIS_CONFIG_CORTEXA15 - bli_gks_register_cntx( BLIS_ARCH_CORTEXA15, bli_cntx_init_cortexa15, - bli_cntx_init_cortexa15_ref, - bli_cntx_init_cortexa15_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_CORTEXA15, bli_cntx_init_cortexa15, + bli_cntx_init_cortexa15_ref, + bli_cntx_init_cortexa15_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_CORTEXA9 - bli_gks_register_cntx( BLIS_ARCH_CORTEXA9, bli_cntx_init_cortexa9, - bli_cntx_init_cortexa9_ref, - bli_cntx_init_cortexa9_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_CORTEXA9, bli_cntx_init_cortexa9, + bli_cntx_init_cortexa9_ref, + bli_cntx_init_cortexa9_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- IBM architectures ------------------------------------------------ + // -- IBM architectures ------------------------------------------------ #ifdef BLIS_CONFIG_POWER10 - bli_gks_register_cntx( BLIS_ARCH_POWER10, bli_cntx_init_power10, - bli_cntx_init_power10_ref, - bli_cntx_init_power10_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_POWER10, bli_cntx_init_power10, + bli_cntx_init_power10_ref, + bli_cntx_init_power10_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_POWER9 - bli_gks_register_cntx( BLIS_ARCH_POWER9, bli_cntx_init_power9, - bli_cntx_init_power9_ref, - bli_cntx_init_power9_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_POWER9, bli_cntx_init_power9, + bli_cntx_init_power9_ref, + bli_cntx_init_power9_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_POWER7 - bli_gks_register_cntx( BLIS_ARCH_POWER7, bli_cntx_init_power7, - bli_cntx_init_power7_ref, - bli_cntx_init_power7_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_POWER7, bli_cntx_init_power7, + bli_cntx_init_power7_ref, + bli_cntx_init_power7_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif #ifdef BLIS_CONFIG_BGQ - bli_gks_register_cntx( BLIS_ARCH_BGQ, bli_cntx_init_bgq, - bli_cntx_init_bgq_ref, - bli_cntx_init_bgq_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_BGQ, bli_cntx_init_bgq, + bli_cntx_init_bgq_ref, + bli_cntx_init_bgq_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - // -- Generic architectures -------------------------------------------- + // -- Generic architectures -------------------------------------------- #ifdef BLIS_CONFIG_GENERIC - bli_gks_register_cntx( BLIS_ARCH_GENERIC, bli_cntx_init_generic, - bli_cntx_init_generic_ref, - bli_cntx_init_generic_ind ); + r_val = + bli_gks_register_cntx( BLIS_ARCH_GENERIC, bli_cntx_init_generic, + bli_cntx_init_generic_ref, + bli_cntx_init_generic_ind ); + bli_check_callthen_return_if_failure( bli_gks_finalize(), r_val ); #endif - } + + // Mark the API as initialized. + bli_gks_mark_init(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_gks_finalize( void ) +err_t bli_gks_finalize( void ) { arch_t id; ind_t ind; - // BEGIN CRITICAL SECTION - // NOTE: This critical section is implicit. We assume this function is only - // called from within the critical section within bli_finalize(). - { + // NOTE: We assume this function is only called by one thread. + // Sanity check: Return early if the API is uninitialized. + if ( !bli_gks_is_init() ) return BLIS_SUCCESS; + + { // Iterate over the architectures in the gks array. for ( id = 0; id < BLIS_NUM_ARCHS; ++id ) { @@ -263,10 +347,18 @@ void bli_gks_finalize( void ) // Free the array of BLIS_NUM_IND_METHODS cntx* elements. bli_free_intl( gks_id ); } - } + // Set gks[ id ] to NULL. Not necessary, since bli_gks_init_index() + // will reset all elements of the gks array to zero (NULL) the next + // time the bli_gks_init() is called, but also doesn't hurt. + gks[ id ] = NULL; + } } - // END CRITICAL SECTION + + // Mark the API as uninitialized. + bli_gks_mark_uninit(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -276,8 +368,8 @@ void bli_gks_init_index( void ) // This function is called by bli_gks_init(). It simply initializes all // architecture id elements of the internal arrays to NULL. - const size_t gks_size = sizeof( cntx_t* ) * BLIS_NUM_ARCHS; - const size_t fpa_size = sizeof( void_fp ) * BLIS_NUM_ARCHS; + const size_t gks_size = sizeof( cntx_t** ) * BLIS_NUM_ARCHS; + const size_t fpa_size = sizeof( void_fp ) * BLIS_NUM_ARCHS; // Set every entry in gks and context init function pointer arrays to // zero/NULL. This is done so that later on we know which ones were @@ -289,23 +381,24 @@ void bli_gks_init_index( void ) // ----------------------------------------------------------------------------- -const cntx_t* bli_gks_lookup_nat_cntx +err_t bli_gks_lookup_nat_cntx ( - arch_t id + arch_t id, + const cntx_t** cntx ) { // Return the address of the (native) context for a given architecture id. // This function assumes the architecture has already been registered. - - return bli_gks_lookup_ind_cntx( id, BLIS_NAT ); + return bli_gks_lookup_ind_cntx( id, BLIS_NAT, cntx ); } // ----------------------------------------------------------------------------- -const cntx_t* bli_gks_lookup_ind_cntx +err_t bli_gks_lookup_ind_cntx ( - arch_t id, - ind_t ind + arch_t id, + ind_t ind, + const cntx_t** cntx ) { // Return the address of the context for a given architecture id and @@ -318,7 +411,7 @@ const cntx_t* bli_gks_lookup_ind_cntx if ( bli_error_checking_is_enabled() ) { err_t e_val = bli_check_valid_arch_id( id ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); } // Index into the array of context pointers for the given architecture id, @@ -327,7 +420,9 @@ const cntx_t* bli_gks_lookup_ind_cntx cntx_t* gks_id_ind = gks_id[ ind ]; // Return the context pointer at gks_id_ind. - return gks_id_ind; + *cntx = gks_id_ind; + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -351,7 +446,7 @@ const cntx_t* const * bli_gks_lookup_id // ----------------------------------------------------------------------------- -void bli_gks_register_cntx +err_t bli_gks_register_cntx ( arch_t id, void_fp nat_fp, @@ -380,11 +475,9 @@ void bli_gks_register_cntx if ( bli_error_checking_is_enabled() ) { err_t e_val = bli_check_valid_arch_id( id ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); } - nat_cntx_init_ft f = nat_fp; - // First, store the function pointers to the context initialization // functions for reference kernels and induced method execution. The // former will be used whenever we need to obtain reference kernels and @@ -399,7 +492,7 @@ void bli_gks_register_cntx // This is really just a safety feature to prevent memory leaks; this // early return should never occur, because the caller should never try // to register with an architecture id that has already been registered. - if ( gks[ id ] != NULL ) return; + if ( gks[ id ] != NULL ) return BLIS_SUCCESS; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_gks_register_cntx(): " ); @@ -410,6 +503,7 @@ void bli_gks_register_cntx // zeros/NULL, storing the address of the alloacted memory at the element // for the current architecture id. gks[ id ] = bli_calloc_intl( sizeof( cntx_t* ) * BLIS_NUM_IND_METHODS, &r_val ); + bli_check_return_if_failure( r_val ); // Alias the allocated array for readability. cntx_t** gks_id = gks[ id ]; @@ -418,14 +512,16 @@ void bli_gks_register_cntx printf( "bli_gks_register_cntx(): " ); #endif - // Allocate memory for a single context and store the address at - // the element in the gks[ id ] array that is reserved for native - // execution. + // Allocate memory for a single context and store the address at the element + // in the gks[ id ] array that is reserved for native execution. gks_id[ BLIS_NAT ] = bli_calloc_intl( sizeof( cntx_t ), &r_val ); + bli_check_return_if_failure( r_val ); // Alias the allocated context address for readability. cntx_t* gks_id_nat = gks_id[ BLIS_NAT ]; + nat_cntx_init_ft f = nat_fp; + // Call the context initialization function on the element of the newly // allocated array corresponding to native execution. f( gks_id_nat ); @@ -454,58 +550,70 @@ void bli_gks_register_cntx const blksz_t* nr = bli_cntx_get_blksz( BLIS_NR, gks_id_nat ); const blksz_t* kr = bli_cntx_get_blksz( BLIS_KR, gks_id_nat ); - e_val = bli_check_valid_mc_mod_mult( mc, mr ); bli_check_error_code( e_val ); - e_val = bli_check_valid_nc_mod_mult( nc, nr ); bli_check_error_code( e_val ); - e_val = bli_check_valid_kc_mod_mult( kc, kr ); bli_check_error_code( e_val ); + e_val = bli_check_valid_mc_mod_mult( mc, mr ); bli_check_return_error_code( e_val ); + e_val = bli_check_valid_nc_mod_mult( nc, nr ); bli_check_return_error_code( e_val ); + e_val = bli_check_valid_kc_mod_mult( kc, kr ); bli_check_return_error_code( e_val ); #ifndef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS - e_val = bli_check_valid_mc_mod_mult( mc, nr ); bli_check_error_code( e_val ); - e_val = bli_check_valid_nc_mod_mult( nc, mr ); bli_check_error_code( e_val ); + e_val = bli_check_valid_mc_mod_mult( mc, nr ); bli_check_return_error_code( e_val ); + e_val = bli_check_valid_nc_mod_mult( nc, mr ); bli_check_return_error_code( e_val ); #endif // Verify that the register blocksizes in the context are sufficiently large // relative to the maximum stack buffer size defined at configure-time. e_val = bli_check_sufficient_stack_buf_size( gks_id_nat ); - bli_check_error_code( e_val ); + bli_check_return_error_code( e_val ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -const cntx_t* bli_gks_query_cntx( void ) +err_t bli_gks_query_cntx( const cntx_t** cntx ) { - return bli_gks_query_nat_cntx(); + return bli_gks_query_nat_cntx( cntx ); } -const cntx_t* bli_gks_query_nat_cntx( void ) +err_t bli_gks_query_nat_cntx( const cntx_t** cntx ) { - bli_init_once(); + BLIS_INIT_ONCE(); + + arch_t id; + err_t r_val; // Return the address of the native context for the architecture id // corresponding to the current hardware, as determined by // bli_arch_query_id(). // Query the architecture id. - arch_t id = bli_arch_query_id(); + r_val = bli_arch_query_id( &id ); + bli_check_return_if_failure( r_val ); // Use the architecture id to look up a pointer to its context. - const cntx_t* cntx = bli_gks_lookup_nat_cntx( id ); + r_val = bli_gks_lookup_nat_cntx( id, cntx ); + bli_check_return_if_failure( r_val ); - return cntx; + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -const cntx_t* bli_gks_query_cntx_noinit( void ) +err_t bli_gks_query_cntx_noinit( const cntx_t** cntx ) { + arch_t id; + err_t r_val; + // This function is identical to bli_gks_query_cntx(), except that it // does not call bli_init_once(). // Query the architecture id. - arch_t id = bli_arch_query_id(); + r_val = bli_arch_query_id( &id ); + bli_check_return_if_failure( r_val ); // Use the architecture id to look up a pointer to its context. - const cntx_t* cntx = bli_gks_lookup_nat_cntx( id ); + r_val = bli_gks_lookup_nat_cntx( id, cntx ); + bli_check_return_if_failure( r_val ); - return cntx; + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -514,39 +622,41 @@ const cntx_t* bli_gks_query_cntx_noinit( void ) // with a new entry corresponding to a context for an ind_t value. static bli_pthread_mutex_t gks_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; -const cntx_t* bli_gks_query_ind_cntx +err_t bli_gks_query_ind_cntx ( - ind_t ind + ind_t ind, + const cntx_t** cntx ) { - bli_init_once(); + BLIS_INIT_ONCE(); + arch_t id; cntx_t* gks_id_ind; - err_t r_val; + err_t r_val; // Return the address of a context that will be suited for executing a - // level-3 operation via the requested induced method (and datatype) for - // the architecture id corresponding to the current hardware, as - // determined by bli_arch_query_id(). - - // This function is called when a level-3 operation via induced method is - // called, e.g. bli_gemm1m(). If this is the first time that induced method - // is being executed since bli_gks_init(), the necessary context structure - // is allocated and initialized. If this is not the first time, then the - // address of a previously-allocated and initialized (cached) context is - // returned. Note that much of this must be done with mutual exclusion to - // ensure thread safety and deterministic behavior. + // level-3 operation via the requested induced method for the arch_t id + // corresponding to the current hardware, as determined by + // bli_arch_query_id(). - // Query the architecture id. - arch_t id = bli_arch_query_id(); + // If this is the first time that induced method is being executed since + // bli_gks_init(), the necessary context structure is allocated and + // initialized. If this is not the first time, then the address of a + // previously-allocated and initialized (cached) context is returned. + // Note that much of this must be done with mutual exclusion to ensure + // thread safety and deterministic behavior. - // Sanity check: verify that the arch_t id is valid. + // Sanity check: verify that the induced method id is valid. if ( bli_error_checking_is_enabled() ) { - err_t e_val = bli_check_valid_arch_id( id ); - bli_check_error_code( e_val ); + err_t e_val = bli_check_valid_ind( ind ); + bli_check_return_error_code( e_val ); } + // Query the architecture id. + r_val = bli_arch_query_id( &id ); + bli_check_return_if_failure( r_val ); + // NOTE: These initial statements can reside outside of the critical section // because gks[ id ] should have already been allocated, and the native // context in that array should have already been allocated/initialized. @@ -558,7 +668,7 @@ const cntx_t* bli_gks_query_ind_cntx // If for some reason the native context was requested, we can return // its address early. - if ( ind == BLIS_NAT ) return gks_id_nat; + if ( ind == BLIS_NAT ) { *cntx = gks_id_nat; return BLIS_SUCCESS; } // This function assumes that the architecture idenified by id has // already been registered with the gks (which guarantees that @@ -581,24 +691,28 @@ const cntx_t* bli_gks_query_ind_cntx // If gks_id_ind is NULL, then we know we must allocate and then // initialize the context, storing its address back to // gks_id[ ind ]. - gks_id_ind = bli_calloc_intl( sizeof( cntx_t ), &r_val ); - gks_id[ ind ] = gks_id_ind; - - // Before we can call the induced method context initialization - // function on the newly allocated structure, we must first copy - // over the contents of the native context. - *gks_id_ind = *gks_id_nat; - - // Use the architecture id to look up the function pointer to the - // context initialization function for induced methods. - ind_cntx_init_ft f = cntx_ind_init[ id ]; - - // Now we modify the context (so that it contains the proper values - // for its induced method) by calling the context initialization - // function for the current induced method. (That function assumes - // that the context is pre- initialized with values for native - // execution.) - f( ind, gks_id_ind ); + gks_id_ind = bli_calloc_intl( sizeof( cntx_t ), &r_val ); + + if ( bli_is_success( r_val ) ) + { + gks_id[ ind ] = gks_id_ind; + + // Before we can call the induced method context initialization + // function on the newly allocated structure, we must first copy + // over the contents of the native context. + *gks_id_ind = *gks_id_nat; + + // Use the architecture id to look up the function pointer to the + // context initialization function for induced methods. + ind_cntx_init_ft f = cntx_ind_init[ id ]; + + // Now we modify the context (so that it contains the proper values + // for its induced method) by calling the context initialization + // function for the current induced method. (That function assumes + // that the context is pre- initialized with values for native + // execution.) + f( ind, gks_id_ind ); + } } } // END CRITICAL SECTION @@ -606,27 +720,29 @@ const cntx_t* bli_gks_query_ind_cntx // Release the mutex protecting the gks. bli_pthread_mutex_unlock( &gks_mutex ); + // Now that we're out of the critical section, we can return if + // bli_calloc_intl() failed. + bli_check_return_if_failure( r_val ); + // Return the address of the newly-allocated/initialized context. - return gks_id_ind; + *cntx = gks_id_ind; + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_gks_init_ref_cntx +err_t bli_gks_init_ref_cntx ( cntx_t* cntx ) { - // Query the architecture id. - arch_t id = bli_arch_query_id(); + arch_t id; + err_t r_val; - // Sanity check: verify that the arch_t id is valid. - if ( bli_error_checking_is_enabled() ) - { - err_t e_val = bli_check_valid_arch_id( id ); - bli_check_error_code( e_val ); - } + // Query the architecture id. + r_val = bli_arch_query_id( &id ); + bli_check_return_if_failure( r_val ); // Obtain the function pointer to the context initialization function for // reference kernels. @@ -634,22 +750,26 @@ void bli_gks_init_ref_cntx // Initialize the caller's context with reference kernels and related values. f( cntx ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -bool bli_gks_cntx_l3_nat_ukr_is_ref +err_t bli_gks_cntx_l3_nat_ukr_is_ref ( num_t dt, ukr_t ukr_id, - const cntx_t* cntx + const cntx_t* cntx, + bool* is_ref ) { cntx_t ref_cntx; + err_t r_val; - // Initialize a context with reference kernels for the arch_t id queried - // via bli_arch_query_id(). - bli_gks_init_ref_cntx( &ref_cntx ); + // Initialize a context with reference kernels. + r_val = bli_gks_init_ref_cntx( &ref_cntx ); + bli_check_return_if_failure( r_val ); // Query each context for the micro-kernel function pointer for the // specified datatype. @@ -657,7 +777,9 @@ bool bli_gks_cntx_l3_nat_ukr_is_ref void_fp fp = bli_cntx_get_ukr_dt( dt, ukr_id, cntx ); // Return the result. - return fp == ref_fp; + *is_ref = ( fp == ref_fp ); + + return BLIS_SUCCESS; } // @@ -674,15 +796,23 @@ static const char* bli_gks_l3_ukr_impl_str[BLIS_NUM_UKR_IMPL_TYPES] = // ----------------------------------------------------------------------------- -const char* bli_gks_l3_ukr_impl_string( ukr_t ukr, ind_t method, num_t dt ) +err_t bli_gks_l3_ukr_impl_string( ukr_t ukr, ind_t method, num_t dt, const char** str ) { - kimpl_t ki; + BLIS_INIT_ONCE(); + + err_t r_val; + kimpl_t ki; + const cntx_t* cntx; + void_fp fp; // Query the context for the current induced method and datatype, and // then query the ukernel function pointer for the given datatype from // that context. - const cntx_t* cntx = bli_gks_query_ind_cntx( method ); - void_fp fp = bli_cntx_get_ukr_dt( dt, ukr, cntx ); + r_val = bli_gks_query_ind_cntx( method, &cntx ); + bli_check_return_if_failure( r_val ); + + fp = bli_cntx_get_ukr_dt( dt, ukr, cntx ); + //bli_check_return_if_failure( r_val ); // Check whether the ukernel function pointer is NULL for the given // datatype. If it is NULL, return the string for not applicable. @@ -691,17 +821,23 @@ const char* bli_gks_l3_ukr_impl_string( ukr_t ukr, ind_t method, num_t dt ) if ( fp == NULL ) ki = BLIS_NOTAPPLIC_UKERNEL; else - ki = bli_gks_l3_ukr_impl_type( ukr, method, dt ); + { + r_val = bli_gks_l3_ukr_impl_type( ukr, method, dt, &ki ); + bli_check_return_if_failure( r_val ); + } + + *str = bli_gks_l3_ukr_impl_str[ ki ]; - return bli_gks_l3_ukr_impl_str[ ki ]; + return BLIS_SUCCESS; } #if 0 -char* bli_gks_l3_ukr_avail_impl_string( ukr_t ukr, num_t dt ) +err_t bli_gks_l3_ukr_avail_impl_string( ukr_t ukr, num_t dt, const char** str ) { opid_t oper; ind_t method; kimpl_t ki; + err_t r_val; // We need to decide which operation we will use to query the // current available induced method. If the ukr type given is @@ -716,44 +852,49 @@ char* bli_gks_l3_ukr_avail_impl_string( ukr_t ukr, num_t dt ) // Query the ukernel implementation type using the current // available method. - ki = bli_gks_l3_ukr_impl_type( ukr, method, dt ); + r_val = bli_gks_l3_ukr_impl_type( ukr, method, dt, ki ); + bli_check_return_if_failure( r_val ); + + *str = bli_ukr_impl_str[ ki ]; - return bli_ukr_impl_str[ ki ]; + return BLIS_SUCCESS; } #endif -kimpl_t bli_gks_l3_ukr_impl_type( ukr_t ukr, ind_t method, num_t dt ) +err_t bli_gks_l3_ukr_impl_type( ukr_t ukr, ind_t method, num_t dt, kimpl_t* ki ) { // If the current available induced method is not native, it // must be virtual. - if ( method != BLIS_NAT ) return BLIS_VIRTUAL_UKERNEL; + if ( method != BLIS_NAT ) *ki = BLIS_VIRTUAL_UKERNEL; else { - // If the current available induced method for the gemm - // operation is native, then it might be reference or - // optimized. To determine which, we compare the - // datatype-specific function pointer within the ukrs - // object corresponding to the current available induced - // method to the typed function pointer within the known - // reference ukrs object. + // If the current available induced method for the gemm operation + // is native, then it might be reference or optimized. To determine + // which, we compare the datatype-specific function pointer within + // the ukrs object corresponding to the current available induced + // method to the typed function pointer within the known reference + // ukrs object. - // Query the architecture id. - arch_t id = bli_arch_query_id(); + arch_t id; + err_t r_val; - // Sanity check: verify that the arch_t id is valid. - if ( bli_error_checking_is_enabled() ) - { - err_t e_val = bli_check_valid_arch_id( id ); - bli_check_error_code( e_val ); - } + // Query the architecture id. + r_val = bli_arch_query_id( &id ); + bli_check_return_if_failure( r_val ); // Query the native context from the gks. - const cntx_t* nat_cntx = bli_gks_lookup_nat_cntx( id ); + const cntx_t* nat_cntx; + r_val = bli_gks_lookup_nat_cntx( id, &nat_cntx ); + bli_check_return_if_failure( r_val ); - if ( bli_gks_cntx_l3_nat_ukr_is_ref( dt, ukr, nat_cntx ) ) - return BLIS_REFERENCE_UKERNEL; - else - return BLIS_OPTIMIZED_UKERNEL; + bool is_ref; + r_val = bli_gks_cntx_l3_nat_ukr_is_ref( dt, ukr, nat_cntx, &is_ref ); + bli_check_return_if_failure( r_val ); + + if ( is_ref ) *ki = BLIS_REFERENCE_UKERNEL; + else *ki = BLIS_OPTIMIZED_UKERNEL; } + + return BLIS_SUCCESS; } diff --git a/frame/base/bli_gks.h b/frame/base/bli_gks.h index 3a93fd59e8..6910c7d716 100644 --- a/frame/base/bli_gks.h +++ b/frame/base/bli_gks.h @@ -35,31 +35,63 @@ #ifndef BLIS_GKS_H #define BLIS_GKS_H -void bli_gks_init( void ); -void bli_gks_finalize( void ); +bool bli_gks_is_init( void ); +void bli_gks_mark_init( void ); +void bli_gks_mark_uninit( void ); -void bli_gks_init_index( void ); +err_t bli_gks_init( void ); +err_t bli_gks_finalize( void ); -const cntx_t* bli_gks_lookup_nat_cntx( arch_t id ); -const cntx_t* bli_gks_lookup_ind_cntx( arch_t id, ind_t ind ); -const cntx_t* const * bli_gks_lookup_id( arch_t id ); -void bli_gks_register_cntx( arch_t id, void_fp nat_fp, void_fp ref_fp, void_fp ind_fp ); +void bli_gks_init_index( void ); -BLIS_EXPORT_BLIS const cntx_t* bli_gks_query_cntx( void ); -BLIS_EXPORT_BLIS const cntx_t* bli_gks_query_nat_cntx( void ); +err_t bli_gks_lookup_nat_cntx( arch_t id, const cntx_t** cntx ); +err_t bli_gks_lookup_ind_cntx( arch_t id, ind_t ind, const cntx_t** cntx ); +const cntx_t* const * bli_gks_lookup_id( arch_t id ); +err_t bli_gks_register_cntx( arch_t id, void_fp nat_fp, void_fp ref_fp, void_fp ind_fp ); -const cntx_t* bli_gks_query_cntx_noinit( void ); +BLIS_EXPORT_BLIS err_t bli_gks_query_cntx( const cntx_t** cntx ); +BLIS_EXPORT_BLIS err_t bli_gks_query_nat_cntx( const cntx_t** cntx ); -BLIS_EXPORT_BLIS const cntx_t* bli_gks_query_ind_cntx( ind_t ind ); +err_t bli_gks_query_cntx_noinit( const cntx_t** cntx ); -BLIS_EXPORT_BLIS void bli_gks_init_ref_cntx( cntx_t* cntx ); +BLIS_EXPORT_BLIS err_t bli_gks_query_ind_cntx( ind_t ind, const cntx_t** cntx ); -bool bli_gks_cntx_l3_nat_ukr_is_ref( num_t dt, ukr_t ukr_id, const cntx_t* cntx ); +BLIS_EXPORT_BLIS err_t bli_gks_init_ref_cntx( cntx_t* cntx ); -BLIS_EXPORT_BLIS const char* bli_gks_l3_ukr_impl_string( ukr_t ukr, ind_t method, num_t dt ); -BLIS_EXPORT_BLIS kimpl_t bli_gks_l3_ukr_impl_type( ukr_t ukr, ind_t method, num_t dt ); +err_t bli_gks_cntx_l3_nat_ukr_is_ref( num_t dt, ukr_t ukr_id, const cntx_t* cntx, bool* is_ref ); -//char* bli_gks_l3_ukr_avail_impl_string( ukr_t ukr, num_t dt ); +BLIS_EXPORT_BLIS err_t bli_gks_l3_ukr_impl_string( ukr_t ukr, ind_t method, num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_gks_l3_ukr_impl_type( ukr_t ukr, ind_t method, num_t dt, kimpl_t* ki ); + +// +// -- cntx_t* query convenience wrapper ---------------------------------------- +// + +BLIS_INLINE err_t bli_gks_query_cntx_if_null( const cntx_t** cntx ) +{ + err_t r_val; + + if ( *cntx == NULL ) + { + r_val = bli_gks_query_nat_cntx( cntx ); + bli_check_return_if_failure( r_val ); + } + + return BLIS_SUCCESS; +} + +BLIS_INLINE err_t bli_gks_query_ind_cntx_if_null( ind_t im, const cntx_t** cntx ) +{ + err_t r_val; + + if ( *cntx == NULL ) + { + r_val = bli_gks_query_ind_cntx( im, cntx ); + bli_check_return_if_failure( r_val ); + } + + return BLIS_SUCCESS; +} #endif diff --git a/frame/base/bli_ind.c b/frame/base/bli_ind.c index fbe7404654..fd3376f4e0 100644 --- a/frame/base/bli_ind.c +++ b/frame/base/bli_ind.c @@ -40,13 +40,45 @@ static const char* bli_ind_impl_str[BLIS_NUM_IND_METHODS] = /* nat */ "native", }; +// A boolean that tracks whether bli_ind_init() has completed successfully. +static bool ind_is_init = FALSE; + // ----------------------------------------------------------------------------- -void bli_ind_init( void ) +bool bli_ind_is_init( void ) +{ + return ind_is_init; +} + +void bli_ind_mark_init( void ) { + ind_is_init = TRUE; +} + +void bli_ind_mark_uninit( void ) +{ + ind_is_init = FALSE; +} + +// ----------------------------------------------------------------------------- + +err_t bli_ind_init( void ) +{ + const cntx_t* cntx; + err_t r_val; + + // NOTE: We assume this function is only called by one thread. + + // Sanity check: Return early if the API is already initialized. + if ( bli_ind_is_init() ) return BLIS_SUCCESS; + // NOTE: Instead of calling bli_gks_query_cntx(), we call // bli_gks_query_cntx_noinit() to avoid the call to bli_init_once(). - const cntx_t* cntx = bli_gks_query_cntx_noinit(); + r_val = bli_gks_query_cntx_noinit( &cntx ); + bli_check_return_if_failure( r_val ); + + bool s_is_ref, c_is_ref, + d_is_ref, z_is_ref; // For each precision, enable the default induced method (1m) if both of // the following conditions are met: @@ -55,17 +87,38 @@ void bli_ind_init( void ) // The second condition means that BLIS will not bother to use an induced // method if both the real and complex domain kernels are reference. - bool s_is_ref = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_FLOAT, BLIS_GEMM_UKR, cntx ); - bool d_is_ref = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_DOUBLE, BLIS_GEMM_UKR, cntx ); - bool c_is_ref = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_SCOMPLEX, BLIS_GEMM_UKR, cntx ); - bool z_is_ref = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_DCOMPLEX, BLIS_GEMM_UKR, cntx ); + r_val = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_FLOAT, BLIS_GEMM_UKR, cntx, &s_is_ref ); + bli_check_return_if_failure( r_val ); + + r_val = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_DOUBLE, BLIS_GEMM_UKR, cntx, &d_is_ref ); + bli_check_return_if_failure( r_val ); + + r_val = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_SCOMPLEX, BLIS_GEMM_UKR, cntx, &c_is_ref ); + bli_check_return_if_failure( r_val ); + + r_val = bli_gks_cntx_l3_nat_ukr_is_ref( BLIS_DCOMPLEX, BLIS_GEMM_UKR, cntx, &z_is_ref ); + bli_check_return_if_failure( r_val ); if ( c_is_ref && !s_is_ref ) bli_ind_enable_dt( BLIS_1M, BLIS_SCOMPLEX ); if ( z_is_ref && !d_is_ref ) bli_ind_enable_dt( BLIS_1M, BLIS_DCOMPLEX ); + + // Mark the API as initialized. + bli_ind_mark_init(); + + return BLIS_SUCCESS; } -void bli_ind_finalize( void ) +err_t bli_ind_finalize( void ) { + // NOTE: We assume this function is only called by one thread. + + // Sanity check: Return early if the API is uninitialized. + if ( !bli_ind_is_init() ) return BLIS_SUCCESS; + + // Mark the API as uninitialized. + bli_ind_mark_uninit(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -176,11 +229,17 @@ ind_t bli_ind_oper_find_avail( opid_t oper, num_t dt ) return method; } -const char* bli_ind_oper_get_avail_impl_string( opid_t oper, num_t dt ) +// ----------------------------------------------------------------------------- + +err_t bli_ind_oper_get_avail_impl_string( opid_t oper, num_t dt, const char** str ) { + BLIS_INIT_ONCE(); + ind_t method = bli_ind_oper_find_avail( oper, dt ); - return bli_ind_get_impl_string( method ); + *str = bli_ind_get_impl_string( method ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -192,10 +251,9 @@ const char* bli_ind_get_impl_string( ind_t method ) num_t bli_ind_map_cdt_to_index( num_t dt ) { - // A non-complex datatype should never be passed in. - if ( !bli_is_complex( dt ) ) bli_abort(); - - // Map the complex datatype to a zero-based index. + // Map the complex datatype to a zero-based index that matches up with + // the expectations of the induced-method-per-operation state array in + // bli_l3_ind.c. if ( bli_is_scomplex( dt ) ) return 0; else /* if ( bli_is_dcomplex( dt ) ) */ return 1; } diff --git a/frame/base/bli_ind.h b/frame/base/bli_ind.h index e162c5809b..3cf4b24596 100644 --- a/frame/base/bli_ind.h +++ b/frame/base/bli_ind.h @@ -38,26 +38,30 @@ // level-3 induced method management #include "bli_l3_ind.h" -void bli_ind_init( void ); -void bli_ind_finalize( void ); +bool bli_ind_is_init( void ); +void bli_ind_mark_init( void ); +void bli_ind_mark_uninit( void ); -BLIS_EXPORT_BLIS void bli_ind_enable( ind_t method ); -BLIS_EXPORT_BLIS void bli_ind_disable( ind_t method ); -BLIS_EXPORT_BLIS void bli_ind_disable_all( void ); +err_t bli_ind_init( void ); +err_t bli_ind_finalize( void ); -BLIS_EXPORT_BLIS void bli_ind_enable_dt( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS void bli_ind_disable_dt( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS void bli_ind_disable_all_dt( num_t dt ); +BLIS_EXPORT_BLIS void bli_ind_enable( ind_t method ); +BLIS_EXPORT_BLIS void bli_ind_disable( ind_t method ); +BLIS_EXPORT_BLIS void bli_ind_disable_all( void ); -BLIS_EXPORT_BLIS void bli_ind_oper_enable_only( opid_t oper, ind_t method, num_t dt ); +BLIS_EXPORT_BLIS void bli_ind_enable_dt( ind_t method, num_t dt ); +BLIS_EXPORT_BLIS void bli_ind_disable_dt( ind_t method, num_t dt ); +BLIS_EXPORT_BLIS void bli_ind_disable_all_dt( num_t dt ); -BLIS_EXPORT_BLIS bool bli_ind_oper_is_impl( opid_t oper, ind_t method ); -BLIS_EXPORT_BLIS ind_t bli_ind_oper_find_avail( opid_t oper, num_t dt ); -BLIS_EXPORT_BLIS const char* bli_ind_oper_get_avail_impl_string( opid_t oper, num_t dt ); +BLIS_EXPORT_BLIS void bli_ind_oper_enable_only( opid_t oper, ind_t method, num_t dt ); -const char* bli_ind_get_impl_string( ind_t method ); -num_t bli_ind_map_cdt_to_index( num_t dt ); +BLIS_EXPORT_BLIS bool bli_ind_oper_is_impl( opid_t oper, ind_t method ); +BLIS_EXPORT_BLIS ind_t bli_ind_oper_find_avail( opid_t oper, num_t dt ); +BLIS_EXPORT_BLIS err_t bli_ind_oper_get_avail_impl_string( opid_t oper, num_t dt, const char** str ); + +const char* bli_ind_get_impl_string( ind_t method ); +num_t bli_ind_map_cdt_to_index( num_t dt ); #endif diff --git a/frame/base/bli_info.c b/frame/base/bli_info.c index 72b54ca20c..8b691908cf 100644 --- a/frame/base/bli_info.c +++ b/frame/base/bli_info.c @@ -162,29 +162,112 @@ gint_t bli_info_get_enable_sandbox( void ) // -- Level-3 kernel definitions -- -const char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ) -{ bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_GEMM_UKR, method, dt ); } -const char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ) -{ bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_GEMMTRSM_L_UKR, method, dt ); } -const char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ) -{ bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_GEMMTRSM_U_UKR, method, dt ); } -const char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ) -{ bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_TRSM_L_UKR, method, dt ); } -const char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ) -{ bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_TRSM_U_UKR, method, dt ); } +err_t bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_gks_l3_ukr_impl_string( BLIS_GEMM_UKR, method, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_gks_l3_ukr_impl_string( BLIS_GEMMTRSM_L_UKR, method, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_gks_l3_ukr_impl_string( BLIS_GEMMTRSM_U_UKR, method, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_gks_l3_ukr_impl_string( BLIS_TRSM_L_UKR, method, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_gks_l3_ukr_impl_string( BLIS_TRSM_U_UKR, method, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} // -- BLIS implementation query (level-3) -------------------------------------- -const char* bli_info_get_gemm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMM, dt ); } -const char* bli_info_get_gemmt_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMMT, dt ); } -const char* bli_info_get_hemm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_HEMM, dt ); } -const char* bli_info_get_herk_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMMT, dt ); } -const char* bli_info_get_her2k_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMMT, dt ); } -const char* bli_info_get_symm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_SYMM, dt ); } -const char* bli_info_get_syrk_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMMT, dt ); } -const char* bli_info_get_syr2k_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMMT, dt ); } -const char* bli_info_get_trmm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_TRMM, dt ); } -const char* bli_info_get_trmm3_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_TRMM3, dt ); } -const char* bli_info_get_trsm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_TRSM, dt ); } - +err_t bli_info_get_gemm_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_GEMM, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_hemm_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_HEMM, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_herk_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_HERK, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_her2k_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_HER2K, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_symm_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_SYMM, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_syrk_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_SYRK, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_syr2k_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_SYR2K, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_trmm_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_TRMM, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_trmm3_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_TRMM3, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} +err_t bli_info_get_trsm_impl_string( num_t dt, const char** str ) +{ + BLIS_INIT_ONCE(); + err_t r_val = bli_ind_oper_get_avail_impl_string( BLIS_TRSM, dt, str ); + bli_check_return_if_failure( r_val ); + return BLIS_SUCCESS; +} diff --git a/frame/base/bli_info.h b/frame/base/bli_info.h index 250504c231..6f16d7e547 100644 --- a/frame/base/bli_info.h +++ b/frame/base/bli_info.h @@ -81,24 +81,23 @@ BLIS_EXPORT_BLIS gint_t bli_info_get_enable_sandbox( void ); // -- Level-3 kernel definitions -- -BLIS_EXPORT_BLIS const char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ); +BLIS_EXPORT_BLIS err_t bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt, const char** str ); // -- BLIS implementation query (level-3) -------------------------------------- -BLIS_EXPORT_BLIS const char* bli_info_get_gemm_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_gemmt_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_hemm_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_herk_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_her2k_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_symm_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_syrk_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_syr2k_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_trmm_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_trmm3_impl_string( num_t dt ); -BLIS_EXPORT_BLIS const char* bli_info_get_trsm_impl_string( num_t dt ); +BLIS_EXPORT_BLIS err_t bli_info_get_gemm_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_hemm_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_herk_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_her2k_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_symm_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_syrk_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_syr2k_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_trmm_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_trmm3_impl_string( num_t dt, const char** str ); +BLIS_EXPORT_BLIS err_t bli_info_get_trsm_impl_string( num_t dt, const char** str ); diff --git a/frame/base/bli_init.c b/frame/base/bli_init.c index f1baa2c217..6c1ec3c80d 100644 --- a/frame/base/bli_init.c +++ b/frame/base/bli_init.c @@ -37,68 +37,114 @@ // ----------------------------------------------------------------------------- -void bli_init( void ) +err_t bli_init( void ) { - bli_init_once(); + BLIS_INIT_ONCE(); + + return BLIS_SUCCESS; } -void bli_finalize( void ) +err_t bli_finalize( void ) { - bli_finalize_once(); + BLIS_FINALIZE_ONCE(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_init_auto( void ) +err_t bli_init_auto( void ) { - bli_init_once(); + // NOTE: Most callers of this function (e.g. the BLAS compatibility layer) + // will ignore the return value of this function since those functions can't + // return error codes. + BLIS_INIT_ONCE(); + + return BLIS_SUCCESS; } -void bli_finalize_auto( void ) +err_t bli_finalize_auto( void ) { // The _auto() functions are used when initializing the BLAS compatibility // layer. It would not make much sense to automatically initialize and // finalize for every BLAS routine call; therefore, we remain initialized // unless and until the application explicitly calls bli_finalize(). + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- static bli_pthread_switch_t lib_state = BLIS_PTHREAD_SWITCH_INIT; -void bli_init_once( void ) +err_t bli_init_once( void ) { - bli_pthread_switch_on( &lib_state, bli_init_apis ); + // We can typecast from the return value of bli_pthread_switch_on() + // (which is of type 'int') directly to 'err_t' since they share the same + // basic semantics: 0 indicates success while all other values represent + // some kind of error. + return ( err_t )bli_pthread_switch_on( &lib_state, bli_init_apis ); } -void bli_finalize_once( void ) +err_t bli_finalize_once( void ) { - bli_pthread_switch_off( &lib_state, bli_finalize_apis ); + // We can typecast from the return value of bli_pthread_switch_off() + // (which is of type 'int') directly to 'err_t' since they share the same + // basic semantics: 0 indicates success while all other values represent + // some kind of error. + return ( err_t )bli_pthread_switch_off( &lib_state, bli_finalize_apis ); } // ----------------------------------------------------------------------------- int bli_init_apis( void ) { - // Initialize various sub-APIs. - bli_gks_init(); - bli_ind_init(); - bli_thread_init(); - bli_pack_init(); - bli_memsys_init(); - - return 0; + err_t r_val = BLIS_SUCCESS; + + // NOTE: Each of the sub-APIs should either (a) fully initialize into a good + // state (ie: a state in which a subsequent call to the corresponding + // _finalize() function would fully de-allocate whatever was allocated and + // thereby avoid a memory leak), or (b) not initialize at all. + + // NOTE: The bli_check_return_if_failure() macro will return r_val when + // the variable indicates a value indicating failure. Since r_val is + // declared as of type 'err_t' and the function returns a value of type + // 'int', an implicit typecast will occur if/when the macro detects failure. + + r_val = bli_gks_init(); bli_check_return_if_failure( r_val ); + r_val = bli_ind_init(); bli_check_return_if_failure( r_val ); + r_val = bli_thread_init(); bli_check_return_if_failure( r_val ); + r_val = bli_pack_init(); bli_check_return_if_failure( r_val ); + r_val = bli_pba_init(); bli_check_return_if_failure( r_val ); + r_val = bli_sba_init(); bli_check_return_if_failure( r_val ); + + return ( int )BLIS_SUCCESS; } int bli_finalize_apis( void ) { + err_t r_val = BLIS_SUCCESS; + // Finalize various sub-APIs. - bli_memsys_finalize(); + r_val = bli_sba_finalize(); bli_check_return_if_failure( r_val ); + r_val = bli_pba_finalize(); bli_check_return_if_failure( r_val ); + r_val = bli_pack_finalize(); bli_check_return_if_failure( r_val ); + r_val = bli_thread_finalize(); bli_check_return_if_failure( r_val ); + r_val = bli_ind_finalize(); bli_check_return_if_failure( r_val ); + r_val = bli_gks_finalize(); bli_check_return_if_failure( r_val ); + + return ( int )BLIS_SUCCESS; +} + +#if 0 +void bli_finalize_apis_fast( void ) +{ + // Finalize all APIs but skip the error checking. + bli_sba_finalize(); + bli_pba_finalize(); bli_pack_finalize(); bli_thread_finalize(); bli_ind_finalize(); bli_gks_finalize(); - - return 0; } +#endif diff --git a/frame/base/bli_init.h b/frame/base/bli_init.h index d1bea0cb34..230ae15846 100644 --- a/frame/base/bli_init.h +++ b/frame/base/bli_init.h @@ -32,15 +32,15 @@ */ -BLIS_EXPORT_BLIS void bli_init( void ); -BLIS_EXPORT_BLIS void bli_finalize( void ); +BLIS_EXPORT_BLIS err_t bli_init( void ); +BLIS_EXPORT_BLIS err_t bli_finalize( void ); -void bli_init_auto( void ); -void bli_finalize_auto( void ); +err_t bli_init_auto( void ); +err_t bli_finalize_auto( void ); -void bli_init_once( void ); -void bli_finalize_once( void ); +err_t bli_init_once( void ); +err_t bli_finalize_once( void ); -int bli_init_apis( void ); -int bli_finalize_apis( void ); +int bli_init_apis( void ); +int bli_finalize_apis( void ); diff --git a/frame/base/bli_memsys.c b/frame/base/bli_memsys.c deleted file mode 100644 index 7b62ded5c7..0000000000 --- a/frame/base/bli_memsys.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2016, Hewlett Packard Enterprise Development LP - Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -void bli_memsys_init( void ) -{ - // Query a native context so we have something to pass into - // bli_pba_init_pools(). We use BLIS_DOUBLE for the datatype, - // but the dt argument is actually only used when initializing - // contexts for induced methods. - // NOTE: Instead of calling bli_gks_query_cntx(), we call - // bli_gks_query_cntx_noinit() to avoid the call to bli_init_once(). - const cntx_t* cntx_p = bli_gks_query_cntx_noinit(); - - // Initialize the packing block allocator and its data structures. - bli_pba_init( cntx_p ); - - // Initialize the small block allocator and its data structures. - bli_sba_init(); -} - -void bli_memsys_finalize( void ) -{ - // Finalize the small block allocator and its data structures. - bli_sba_finalize(); - - // Finalize the packing block allocator and its data structures. - bli_pba_finalize(); -} - diff --git a/frame/base/bli_memsys.h b/frame/base/bli_memsys.h deleted file mode 100644 index be0d48e35b..0000000000 --- a/frame/base/bli_memsys.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2016, Hewlett Packard Enterprise Development LP - Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_MEMSYS_H -#define BLIS_MEMSYS_H - -// ----------------------------------------------------------------------------- - -void bli_memsys_init( void ); -void bli_memsys_finalize( void ); - - -#endif - diff --git a/frame/base/bli_pack.c b/frame/base/bli_pack.c index c5ce9cc6c9..919d44aaac 100644 --- a/frame/base/bli_pack.c +++ b/frame/base/bli_pack.c @@ -42,45 +42,84 @@ extern rntm_t global_rntm; // resides in bli_rntm.c.) extern bli_pthread_mutex_t global_rntm_mutex; +// A boolean that tracks whether bli_pack_init() has completed successfully. +static bool pack_is_init = FALSE; + // ----------------------------------------------------------------------------- -void bli_pack_init( void ) +bool bli_pack_is_init( void ) +{ + return pack_is_init; +} + +void bli_pack_mark_init( void ) { + pack_is_init = TRUE; +} + +void bli_pack_mark_uninit( void ) +{ + pack_is_init = FALSE; +} + +// ----------------------------------------------------------------------------- + +err_t bli_pack_init( void ) +{ + // Sanity check: Return early if the API is already initialized. + if ( bli_pack_is_init() ) return BLIS_SUCCESS; + // Read the environment variables and use them to initialize the // global runtime object. bli_pack_init_rntm_from_env( &global_rntm ); + + // Mark the API as initialized. + bli_pack_mark_init(); + + return BLIS_SUCCESS; } -void bli_pack_finalize( void ) +err_t bli_pack_finalize( void ) { + // Sanity check: Return early if the API is uninitialized. + if ( !bli_pack_is_init() ) return BLIS_SUCCESS; + + // Mark the API as uninitialized. + bli_pack_mark_uninit(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_pack_get_pack_a( bool* pack_a ) +err_t bli_pack_get_pack_a( bool* pack_a ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); *pack_a = bli_rntm_pack_a( &global_rntm ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -void bli_pack_get_pack_b( bool* pack_b ) +err_t bli_pack_get_pack_b( bool* pack_b ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); *pack_b = bli_rntm_pack_b( &global_rntm ); + + return BLIS_SUCCESS; } // ---------------------------------------------------------------------------- -void bli_pack_set_pack_a( bool pack_a ) +err_t bli_pack_set_pack_a( bool pack_a ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); @@ -89,14 +128,16 @@ void bli_pack_set_pack_a( bool pack_a ) // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); + + return BLIS_SUCCESS; } // ---------------------------------------------------------------------------- -void bli_pack_set_pack_b( bool pack_b ) +err_t bli_pack_set_pack_b( bool pack_b ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); @@ -105,6 +146,8 @@ void bli_pack_set_pack_b( bool pack_b ) // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); + + return BLIS_SUCCESS; } // ---------------------------------------------------------------------------- diff --git a/frame/base/bli_pack.h b/frame/base/bli_pack.h index c12740148c..8c0ade6377 100644 --- a/frame/base/bli_pack.h +++ b/frame/base/bli_pack.h @@ -35,15 +35,19 @@ #ifndef BLIS_PACK_H #define BLIS_PACK_H -void bli_pack_init( void ); -void bli_pack_finalize( void ); +bool bli_pack_is_init( void ); +void bli_pack_mark_init( void ); +void bli_pack_mark_uninit( void ); -BLIS_EXPORT_BLIS void bli_pack_get_pack_a( bool* pack_a ); -BLIS_EXPORT_BLIS void bli_pack_get_pack_b( bool* pack_b ); -BLIS_EXPORT_BLIS void bli_pack_set_pack_a( bool pack_a ); -BLIS_EXPORT_BLIS void bli_pack_set_pack_b( bool pack_b ); +err_t bli_pack_init( void ); +err_t bli_pack_finalize( void ); -void bli_pack_init_rntm_from_env( rntm_t* rntm ); +BLIS_EXPORT_BLIS err_t bli_pack_get_pack_a( bool* pack_a ); +BLIS_EXPORT_BLIS err_t bli_pack_get_pack_b( bool* pack_b ); +BLIS_EXPORT_BLIS err_t bli_pack_set_pack_a( bool pack_a ); +BLIS_EXPORT_BLIS err_t bli_pack_set_pack_b( bool pack_b ); + +void bli_pack_init_rntm_from_env( rntm_t* rntm ); #endif diff --git a/frame/base/bli_pba.c b/frame/base/bli_pba.c index 68dffd7285..9b00d44b29 100644 --- a/frame/base/bli_pba.c +++ b/frame/base/bli_pba.c @@ -39,6 +39,26 @@ // Statically initialize the mutex within the packing block allocator object. static pba_t pba = { .mutex = BLIS_PTHREAD_MUTEX_INITIALIZER }; +// A boolean that tracks whether bli_pba_init() has completed successfully. +static bool pba_is_init = FALSE; + +// ----------------------------------------------------------------------------- + +bool bli_pba_is_init( void ) +{ + return pba_is_init; +} + +void bli_pba_mark_init( void ) +{ + pba_is_init = TRUE; +} + +void bli_pba_mark_uninit( void ) +{ + pba_is_init = FALSE; +} + // ----------------------------------------------------------------------------- pba_t* bli_pba_query( void ) @@ -46,13 +66,28 @@ pba_t* bli_pba_query( void ) return &pba; } -void bli_pba_init +void bli_pba_rntm_set_pba ( - const cntx_t* cntx + rntm_t* rntm ) { pba_t* pba = bli_pba_query(); + bli_rntm_set_pba( pba, rntm ); +} + +// ----------------------------------------------------------------------------- + +err_t bli_pba_init + ( + void + ) +{ + // Sanity check: Return early if the API is already initialized. + if ( bli_pba_is_init() ) return BLIS_SUCCESS; + + pba_t* restrict pba = bli_pba_query(); + const siz_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE_GEN; malloc_ft malloc_fp = BLIS_MALLOC_POOL; free_ft free_fp = BLIS_FREE_POOL; @@ -67,20 +102,37 @@ void bli_pba_init // keeps bli_pba_init() simpler and removes the possibility of // something going wrong during mutex initialization. + // The mutex field of pba is initialized statically above. It's + // important to keep the mutex initialization outside of the _init() + // function so that in the rare event that BLIS initialization fails + // part way through, we don't have to worry about whether or not we + // need to destroy the mutex first (before allowing the application + // a second chance at initialization). + #ifdef BLIS_ENABLE_PBA_POOLS - bli_pba_init_pools( cntx, pba ); + err_t r_val = bli_pba_init_pools( pba ); + bli_check_return_if_failure( r_val ); #endif + + // Mark the API as initialized. + bli_pba_mark_init(); + + return BLIS_SUCCESS; } -void bli_pba_finalize +err_t bli_pba_finalize ( void ) { - pba_t* pba = bli_pba_query(); + // Sanity check: Return early if the API is uninitialized. + if ( !bli_pba_is_init() ) return BLIS_SUCCESS; + + pba_t* restrict pba = bli_pba_query(); #ifdef BLIS_ENABLE_PBA_POOLS - bli_pba_finalize_pools( pba ); + err_t r_val = bli_pba_finalize_pools( pba ); + bli_check_return_if_failure( r_val ); #endif // The mutex field of pba is initialized statically above, and @@ -88,9 +140,14 @@ void bli_pba_finalize bli_pba_set_malloc_fp( NULL, pba ); bli_pba_set_free_fp( NULL, pba ); + + // Mark the API as uninitialized. + bli_pba_mark_uninit(); + + return BLIS_SUCCESS; } -void bli_pba_acquire_m +err_t bli_pba_acquire_m ( rntm_t* rntm, siz_t req_size, @@ -98,9 +155,6 @@ void bli_pba_acquire_m mem_t* mem ) { - pool_t* pool; - pblk_t* pblk; - dim_t pi; err_t r_val; // If the internal memory pools for packing block allocator are disabled, @@ -127,6 +181,7 @@ void bli_pba_acquire_m // For general-use buffer requests, dynamically allocating memory // is assumed to be sufficient. void* buf = bli_fmalloc_align( malloc_fp, req_size, align_size, &r_val ); + bli_check_return_if_failure( r_val ); // Initialize the mem_t object with: // - the address of the memory block, @@ -148,11 +203,11 @@ void bli_pba_acquire_m // Map the requested packed buffer type to a zero-based index, which // we then use to select the corresponding memory pool. - pi = bli_packbuf_index( buf_type ); - pool = bli_pba_pool( pi, pba ); + dim_t pi = bli_packbuf_index( buf_type ); + pool_t* pool = bli_pba_pool( pi, pba ); // Extract the address of the pblk_t struct within the mem_t. - pblk = bli_mem_pblk( mem ); + pblk_t* pblk = bli_mem_pblk( mem ); // Acquire the mutex associated with the pba object. bli_pba_lock( pba ); @@ -168,7 +223,7 @@ void bli_pba_acquire_m // automatically, as-needed. Note that the addresses are stored // directly into the mem_t struct since pblk is the address of // the struct's pblk_t field. - bli_pool_checkout_block( req_size, pblk, pool ); + r_val = bli_pool_checkout_block( req_size, pblk, pool ); } // END CRITICAL SECTION @@ -176,6 +231,10 @@ void bli_pba_acquire_m // Release the mutex associated with the pba object. bli_pba_unlock( pba ); + // Now that we're out of the critical section, we can return if + // bli_pool_checkout_block() failed. + bli_check_return_if_failure( r_val ); + // Query the block_size from the pblk_t. This will be at least // req_size, perhaps larger. siz_t block_size = bli_pblk_block_size( pblk ); @@ -192,6 +251,8 @@ void bli_pba_acquire_m bli_mem_set_pool( pool, mem ); bli_mem_set_size( block_size, mem ); } + + return BLIS_SUCCESS; } @@ -256,6 +317,8 @@ void bli_pba_release // NOTE: We do not clear the buf_type field since there is no // "uninitialized" value for packbuf_t. bli_mem_clear( mem ); + + return; // BLIS_SUCCESS; } @@ -313,12 +376,20 @@ siz_t bli_pba_pool_size // ----------------------------------------------------------------------------- -void bli_pba_init_pools +err_t bli_pba_init_pools ( - const cntx_t* cntx, - pba_t* pba + pba_t* pba ) { + const cntx_t* cntx; + err_t r_val; + + // Query a native context so we have something to pass into + // bli_pba_compute_pool_block_sizes(). + // NOTE: Instead of calling bli_gks_query_cntx(), we call + // bli_gks_query_cntx_noinit() to avoid the call to bli_init_once(). + bli_gks_query_cntx_noinit( &cntx ); + // Map each of the packbuf_t values to an index starting at zero. const dim_t index_a = bli_packbuf_index( BLIS_BUFFER_FOR_A_BLOCK ); const dim_t index_b = bli_packbuf_index( BLIS_BUFFER_FOR_B_PANEL ); @@ -365,19 +436,31 @@ void bli_pba_init_pools cntx ); // Initialize the memory pools for A, B, and C. - bli_pool_init( num_blocks_a, block_ptrs_len_a, block_size_a, align_size_a, - offset_size_a, malloc_fp, free_fp, pool_a ); - bli_pool_init( num_blocks_b, block_ptrs_len_b, block_size_b, align_size_b, - offset_size_b, malloc_fp, free_fp, pool_b ); - bli_pool_init( num_blocks_c, block_ptrs_len_c, block_size_c, align_size_c, - offset_size_c, malloc_fp, free_fp, pool_c ); + r_val = bli_pool_init( num_blocks_a, block_ptrs_len_a, block_size_a, align_size_a, + offset_size_a, malloc_fp, free_fp, pool_a ); + + bli_check_callthen_return_if_failure( bli_pba_finalize_pools( pba ), r_val ); + + r_val = bli_pool_init( num_blocks_b, block_ptrs_len_b, block_size_b, align_size_b, + offset_size_b, malloc_fp, free_fp, pool_b ); + + bli_check_callthen_return_if_failure( bli_pba_finalize_pools( pba ), r_val ); + + r_val = bli_pool_init( num_blocks_c, block_ptrs_len_c, block_size_c, align_size_c, + offset_size_c, malloc_fp, free_fp, pool_c ); + + bli_check_callthen_return_if_failure( bli_pba_finalize_pools( pba ), r_val ); + + return BLIS_SUCCESS; } -void bli_pba_finalize_pools +err_t bli_pba_finalize_pools ( pba_t* pba ) { + err_t r_val; + // Map each of the packbuf_t values to an index starting at zero. dim_t index_a = bli_packbuf_index( BLIS_BUFFER_FOR_A_BLOCK ); dim_t index_b = bli_packbuf_index( BLIS_BUFFER_FOR_B_PANEL ); @@ -389,9 +472,11 @@ void bli_pba_finalize_pools pool_t* pool_c = bli_pba_pool( index_c, pba ); // Finalize the memory pools for A, B, and C. - bli_pool_finalize( pool_a ); - bli_pool_finalize( pool_b ); - bli_pool_finalize( pool_c ); + r_val = bli_pool_finalize( pool_a ); bli_check_return_if_failure( r_val ); + r_val = bli_pool_finalize( pool_b ); bli_check_return_if_failure( r_val ); + r_val = bli_pool_finalize( pool_c ); bli_check_return_if_failure( r_val ); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- diff --git a/frame/base/bli_pba.h b/frame/base/bli_pba.h index dfda530902..89d20838dc 100644 --- a/frame/base/bli_pba.h +++ b/frame/base/bli_pba.h @@ -119,18 +119,19 @@ BLIS_INLINE void bli_pba_unlock( pba_t* pba ) // ----------------------------------------------------------------------------- -BLIS_EXPORT_BLIS pba_t* bli_pba_query( void ); +bool bli_pba_is_init( void ); +void bli_pba_mark_init( void ); +void bli_pba_mark_uninit( void ); -void bli_pba_init - ( - const cntx_t* cntx - ); -void bli_pba_finalize - ( - void - ); +pba_t* bli_pba_query( void ); +BLIS_EXPORT_BLIS void bli_pba_rntm_set_pba( rntm_t* rntm ); + +// ----------------------------------------------------------------------------- -void bli_pba_acquire_m +err_t bli_pba_init( void ); +err_t bli_pba_finalize( void ); + +err_t bli_pba_acquire_m ( rntm_t* rntm, siz_t req_size, @@ -144,16 +145,6 @@ void bli_pba_release mem_t* mem ); -BLIS_INLINE void bli_pba_rntm_set_pba - ( - rntm_t* rntm - ) -{ - pba_t* pba = bli_pba_query(); - - bli_rntm_set_pba( pba, rntm ); -} - siz_t bli_pba_pool_size ( const pba_t* pba, @@ -162,12 +153,11 @@ siz_t bli_pba_pool_size // ---------------------------------------------------------------------------- -void bli_pba_init_pools +err_t bli_pba_init_pools ( - const cntx_t* cntx, - pba_t* pba + pba_t* pba ); -void bli_pba_finalize_pools +err_t bli_pba_finalize_pools ( pba_t* pba ); diff --git a/frame/base/bli_pool.c b/frame/base/bli_pool.c index 684b0ef736..b04d1e307b 100644 --- a/frame/base/bli_pool.c +++ b/frame/base/bli_pool.c @@ -37,7 +37,7 @@ //#define BLIS_ENABLE_MEM_TRACING -void bli_pool_init +err_t bli_pool_init ( siz_t num_blocks, siz_t block_ptrs_len, @@ -51,6 +51,11 @@ void bli_pool_init { err_t r_val; + // Start off with a zeroed-out pool_t structure. + // NOTE: This is especially important because it zeroes out the .block_ptrs + // field, which bli_pool_finalize() uses to decide whether to return early. + bli_pool_clear( pool ); + // Make sure that block_ptrs_len is at least num_blocks. block_ptrs_len = bli_max( block_ptrs_len, num_blocks ); @@ -64,31 +69,12 @@ void bli_pool_init ( int )block_ptrs_len ); #endif - // Allocate the block_ptrs array. - // FGVZ: Do we want to call malloc_fp() for internal data structures as - // well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g. + // Allocate the block_ptrs array. We use calloc() so that all elements are + // initialized to zero, or NULL. This allows us to deallocate only those + // blocks that were allocated in the event of a failure. pblk_t* block_ptrs = - bli_malloc_intl( block_ptrs_len * sizeof( pblk_t ), &r_val ); - - // Allocate and initialize each entry in the block_ptrs array. - for ( dim_t i = 0; i < num_blocks; ++i ) - { - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_pool_init(): allocating block %d of size %d (align %d, offset %d).\n", - ( int )i, ( int )block_size, ( int )align_size, ( int )offset_size ); - fflush( stdout ); - #endif - - bli_pool_alloc_block - ( - block_size, - align_size, - offset_size, - malloc_fp, - &(block_ptrs[i]) - ); - } + bli_calloc_intl( block_ptrs_len * sizeof( pblk_t ), &r_val ); // NOTE: The semantics of top_index approximate a stack, where a "full" // stack (no blocks checked out) is one where top_index == 0 and an empty @@ -101,7 +87,14 @@ void bli_pool_init // number line in which blocks are checked out from lowest to highest, // and additional blocks are added at the higher end. + // If the allocation failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + // Initialize the pool_t structure. + // NOTE: Given that the calloc() succeeded, we must set these fields so + // that if any of the below calls to bli_pool_alloc_block() fail, there + // will be enough information in the structure to allow bli_pool_finalize() + // to de-allocate what was allocated. bli_pool_set_block_ptrs( block_ptrs, pool ); bli_pool_set_block_ptrs_len( block_ptrs_len, pool ); bli_pool_set_top_index( 0, pool ); @@ -111,9 +104,33 @@ void bli_pool_init bli_pool_set_offset_size( offset_size, pool ); bli_pool_set_malloc_fp( malloc_fp, pool ); bli_pool_set_free_fp( free_fp, pool ); + + // Allocate and initialize each entry in the block_ptrs array. + for ( dim_t i = 0; i < num_blocks; ++i ) + { + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_pool_init(): allocating block %d of size %d (align %d, offset %d).\n", + ( int )i, ( int )block_size, ( int )align_size, ( int )offset_size ); + fflush( stdout ); + #endif + + r_val = bli_pool_alloc_block + ( + block_size, + align_size, + offset_size, + malloc_fp, + &(block_ptrs[i]) + ); + + // If the allocation failed, finalize the pool and return the error. + bli_check_callthen_return_if_failure( bli_pool_finalize( pool ), r_val ); + } + + return BLIS_SUCCESS; } -void bli_pool_finalize +err_t bli_pool_finalize ( pool_t* pool ) @@ -126,6 +143,10 @@ void bli_pool_finalize // Query the block_ptrs array. pblk_t* block_ptrs = bli_pool_block_ptrs( pool ); + // Return early if the block_ptrs array is NULL. This would typically + // indicate that the pool structure was was cleared but never initialized. + if ( block_ptrs == NULL ) return BLIS_SUCCESS; + // Query the total number of blocks currently allocated. const siz_t num_blocks = bli_pool_num_blocks( pool ); @@ -135,16 +156,17 @@ void bli_pool_finalize // checked out, then we would expect top_index != 0, and therefore this // check is not universally appropriate. #if 0 + err_t r_val; + // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); - // Sanity check: The top_index should be zero. - if ( top_index != 0 ) + // Sanity check: The top_index should be zero. If it's not, then at + // least one block is still checked out to a thread. + if ( bli_error_checking_is_enabled() ) { - printf( "bli_pool_finalize(): final top_index == %d (expected 0); block_size: %d.\n", - ( int )top_index, ( int )bli_pool_block_size( pool ) ); - printf( "bli_pool_finalize(): Implication: not all blocks were checked back in!\n" ); - bli_abort(); + r_val = bli_check_outstanding_mem_pool_blocks( top_index ); + bli_check_return_if_failure( r_val ); } #endif @@ -180,21 +202,18 @@ void bli_pool_finalize // Free the block_ptrs array. bli_free_intl( block_ptrs ); - // This explicit clearing of the pool_t struct is not strictly - // necessary and so it has been commented out. -#if 0 - // Clear the contents of the pool_t struct. - bli_pool_set_block_ptrs( NULL, pool ); - bli_pool_set_block_ptrs_len( 0, pool ); - bli_pool_set_num_blocks( 0, pool ); - bli_pool_set_top_index( 0, pool ); - bli_pool_set_block_size( 0, pool ); - bli_pool_set_align_size( 0, pool ); - bli_pool_set_offset_size( 0, pool ); -#endif + // Clear the pool structure. This step is important because we want to + // either leave the pool structure in a fully initialized state (with a + // non-NULL block_ptrs field) or we want it to be cleared (with a NULL + // block_ptrs field) so that we'll know it is uninitialized. This is + // needed so that the caller can tell if a pool needs to be finalized in + // the event of an error. + bli_pool_clear( pool ); + + return BLIS_SUCCESS; } -void bli_pool_reinit +err_t bli_pool_reinit ( siz_t num_blocks_new, siz_t block_ptrs_len_new, @@ -204,6 +223,8 @@ void bli_pool_reinit pool_t* pool ) { + err_t r_val; + // Preserve the pointers to malloc() and free() provided when the pool // was first initialized. malloc_ft malloc_fp = bli_pool_malloc_fp( pool ); @@ -215,11 +236,14 @@ void bli_pool_reinit // those blocks back into the pool. (This condition can be detected // since the block size is encoded into each pblk, which is copied // upon checkout.) - bli_pool_finalize( pool ); + r_val = bli_pool_finalize( pool ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Reinitialize the pool with the new parameters, in particular, // the new block size. - bli_pool_init + r_val = bli_pool_init ( num_blocks_new, block_ptrs_len_new, @@ -230,15 +254,22 @@ void bli_pool_reinit free_fp, pool ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + + return BLIS_SUCCESS; } -void bli_pool_checkout_block +err_t bli_pool_checkout_block ( siz_t req_size, pblk_t* block, pool_t* pool ) { + err_t r_val; + // If the requested block size is smaller than what the pool was // initialized with, reinitialize the pool to contain blocks of the // requested size. @@ -256,7 +287,7 @@ void bli_pool_checkout_block fflush( stdout ); #endif - bli_pool_reinit + r_val = bli_pool_reinit ( num_blocks_new, block_ptrs_len_new, @@ -265,6 +296,9 @@ void bli_pool_checkout_block offset_size_new, pool ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); } // If the pool is exhausted, add a block. @@ -276,7 +310,10 @@ void bli_pool_checkout_block fflush( stdout ); #endif - bli_pool_grow( 1, pool ); + r_val = bli_pool_grow( 1, pool ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); } // At this point, at least one block is guaranteed to be available. @@ -305,9 +342,11 @@ void bli_pool_checkout_block // Increment the pool's top_index. bli_pool_set_top_index( top_index + 1, pool ); + + return BLIS_SUCCESS; } -void bli_pool_checkin_block +err_t bli_pool_checkin_block ( pblk_t* block, pool_t* pool @@ -326,7 +365,8 @@ void bli_pool_checkin_block free_ft free_fp = bli_pool_free_fp( pool ); bli_pool_free_block( offset_size, free_fp, block ); - return; + + return BLIS_SUCCESS; } // Query the block_ptrs array. @@ -349,9 +389,11 @@ void bli_pool_checkin_block // Decrement the pool's top_index. bli_pool_set_top_index( top_index - 1, pool ); + + return BLIS_SUCCESS; } -void bli_pool_grow +err_t bli_pool_grow ( siz_t num_blocks_add, pool_t* pool @@ -360,7 +402,7 @@ void bli_pool_grow err_t r_val; // If the requested increase is zero, return early. - if ( num_blocks_add == 0 ) return; + if ( num_blocks_add == 0 ) return BLIS_SUCCESS; // Query the allocated length of the block_ptrs array and also the // total number of blocks currently allocated. @@ -396,12 +438,15 @@ void bli_pool_grow // Query the current block_ptrs array. pblk_t* block_ptrs_cur = bli_pool_block_ptrs( pool ); - // Allocate a new block_ptrs array. - // FGVZ: Do we want to call malloc_fp() for internal data structures as - // well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g. + // Allocate the block_ptrs array. We use calloc() so that all elements are + // initialized to zero, or NULL. This allows us to deallocate only those + // blocks that were allocated in the event of a failure. pblk_t* block_ptrs_new = - bli_malloc_intl( block_ptrs_len_new * sizeof( pblk_t ), &r_val ); + bli_calloc_intl( block_ptrs_len_new * sizeof( pblk_t ), &r_val ); + + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); @@ -449,10 +494,12 @@ void bli_pool_grow fflush( stdout ); #endif + dim_t i; + // Allocate the requested additional blocks in the resized array. - for ( dim_t i = num_blocks_cur; i < num_blocks_new; ++i ) + for ( i = num_blocks_cur; i < num_blocks_new; ++i ) { - bli_pool_alloc_block + r_val = bli_pool_alloc_block ( block_size, align_size, @@ -460,22 +507,28 @@ void bli_pool_grow malloc_fp, &(block_ptrs[i]) ); + + // If the previous function failed, update the number of blocks in the + // pool to reflect the number that were added and then return the error. + bli_check_callthen_return_if_failure( bli_pool_set_num_blocks( i, pool ), r_val ); } // Update the pool_t struct with the new number of allocated blocks. // Notice that top_index remains unchanged, as do the block_size and // align_size fields. bli_pool_set_num_blocks( num_blocks_new, pool ); + + return BLIS_SUCCESS; } -void bli_pool_shrink +err_t bli_pool_shrink ( siz_t num_blocks_sub, pool_t* pool ) { // If the requested decrease is zero, return early. - if ( num_blocks_sub == 0 ) return; + if ( num_blocks_sub == 0 ) return BLIS_SUCCESS; // Query the total number of blocks currently allocated. const siz_t num_blocks = bli_pool_num_blocks( pool ); @@ -516,9 +569,11 @@ void bli_pool_shrink // Note that after shrinking the pool, num_blocks < block_ptrs_len. // This means the pool can grow again by num_blocks_sub before // a re-allocation of block_ptrs is triggered. + + return BLIS_SUCCESS; } -void bli_pool_alloc_block +err_t bli_pool_alloc_block ( siz_t block_size, siz_t align_size, @@ -540,10 +595,19 @@ void bli_pool_alloc_block // be recovered when it's time to free the block. Note that we have to // add offset_size to the number of bytes requested since we will skip // that many bytes at the beginning of the allocated memory. + // NOTE: What is the purpose of the offset_size parameter? It was first + // found to be useful by Nicholai Tukanov when optimizing microkernel + // performance on the POWER9 microarchitecture. The subconfiguration + // ('power9') for use on that hardware uses unconventional offset values + // for the pool of packing blocks for A and B. (See bli_pba.c for how + // those pools are created.) void* buf = bli_fmalloc_align( malloc_fp, block_size + offset_size, align_size, &r_val ); + // If the previous function failed, return the error code immediately. + bli_check_return_if_failure( r_val ); + #if 0 // NOTE: This code is disabled because it is not needed, since // bli_fmalloc_align() is guaranteed to return an aligned address. @@ -573,33 +637,40 @@ void bli_pool_alloc_block // Save the results in the pblk_t structure. bli_pblk_set_buf( buf, block ); bli_pblk_set_block_size( block_size, block ); + + return BLIS_SUCCESS; } -void bli_pool_free_block +err_t bli_pool_free_block ( siz_t offset_size, free_ft free_fp, pblk_t* block ) { - #ifdef BLIS_ENABLE_MEM_TRACING - printf( "bli_pool_free_block(): calling ffree_align(): size %d.\n", - ( int )bli_pblk_block_size( block ) ); - fflush( stdout ); - #endif - // Extract the pblk_t buffer, which is the aligned address returned from // bli_fmalloc_align() when the block was allocated. void* buf = bli_pblk_buf( block ); + // Return early if the pointer inside of the pblk_t is NULL. + if ( buf == NULL ) return BLIS_SUCCESS; + // Undo the pointer advancement by offset_size bytes performed previously // by bli_pool_alloc_block(). buf = ( void* )( ( char* )buf - offset_size ); + #ifdef BLIS_ENABLE_MEM_TRACING + printf( "bli_pool_free_block(): calling ffree_align(): size %d.\n", + ( int )bli_pblk_block_size( block ) ); + fflush( stdout ); + #endif + // Free the block via the bli_ffree_align() wrapper, which recovers the // original pointer that was returned by the pool's malloc() function when // the block was allocated. bli_ffree_align( free_fp, buf ); + + return BLIS_SUCCESS; } void bli_pool_print @@ -642,3 +713,20 @@ void bli_pblk_print printf( " block address (aligned): %p\n", buf ); } +void bli_pool_clear + ( + pool_t* pool + ) +{ + // Clear the contents of the pool_t struct. + bli_pool_set_block_ptrs( NULL, pool ); + bli_pool_set_block_ptrs_len( 0, pool ); + bli_pool_set_top_index( 0, pool ); + bli_pool_set_num_blocks( 0, pool ); + bli_pool_set_block_size( 0, pool ); + bli_pool_set_align_size( 0, pool ); + bli_pool_set_offset_size( 0, pool ); + bli_pool_set_malloc_fp( NULL, pool ); + bli_pool_set_free_fp( NULL, pool ); +} + diff --git a/frame/base/bli_pool.h b/frame/base/bli_pool.h index 0b16ae8eea..121872df83 100644 --- a/frame/base/bli_pool.h +++ b/frame/base/bli_pool.h @@ -215,7 +215,7 @@ BLIS_INLINE void bli_pool_set_top_index( siz_t top_index, pool_t* pool ) \ // ----------------------------------------------------------------------------- -void bli_pool_init +err_t bli_pool_init ( siz_t num_blocks, siz_t block_ptrs_len, @@ -226,11 +226,11 @@ void bli_pool_init free_ft free_fp, pool_t* pool ); -void bli_pool_finalize +err_t bli_pool_finalize ( pool_t* pool ); -void bli_pool_reinit +err_t bli_pool_reinit ( siz_t num_blocks_new, siz_t block_ptrs_len_new, @@ -240,30 +240,30 @@ void bli_pool_reinit pool_t* pool ); -void bli_pool_checkout_block +err_t bli_pool_checkout_block ( siz_t req_size, pblk_t* block, pool_t* pool ); -void bli_pool_checkin_block +err_t bli_pool_checkin_block ( pblk_t* block, pool_t* pool ); -void bli_pool_grow +err_t bli_pool_grow ( siz_t num_blocks_add, pool_t* pool ); -void bli_pool_shrink +err_t bli_pool_shrink ( siz_t num_blocks_sub, pool_t* pool ); -void bli_pool_alloc_block +err_t bli_pool_alloc_block ( siz_t block_size, siz_t align_size, @@ -271,7 +271,7 @@ void bli_pool_alloc_block malloc_ft malloc_fp, pblk_t* block ); -void bli_pool_free_block +err_t bli_pool_free_block ( siz_t offset_size, free_ft free_fp, @@ -287,5 +287,9 @@ void bli_pblk_print const pblk_t* pblk ); +void bli_pool_clear + ( + pool_t* pool + ); #endif diff --git a/frame/base/bli_rntm.c b/frame/base/bli_rntm.c index 2c13c74a22..55af6ab386 100644 --- a/frame/base/bli_rntm.c +++ b/frame/base/bli_rntm.c @@ -57,7 +57,7 @@ void bli_rntm_init_from_global( rntm_t* rntm ) bli_pthread_mutex_unlock( &global_rntm_mutex ); } -// ----------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- void bli_rntm_set_ways_for_op ( diff --git a/frame/base/bli_rntm.h b/frame/base/bli_rntm.h index 2a39f8894c..65b80ea786 100644 --- a/frame/base/bli_rntm.h +++ b/frame/base/bli_rntm.h @@ -330,7 +330,9 @@ BLIS_INLINE void bli_rntm_init( rntm_t* rntm ) bli_rntm_clear_pba( rntm ); } +// // -- rntm_t total thread calculation ------------------------------------------ +// BLIS_INLINE dim_t bli_rntm_calc_num_threads ( @@ -348,9 +350,9 @@ BLIS_INLINE dim_t bli_rntm_calc_num_threads return n_threads; } -// ----------------------------------------------------------------------------- - -// Function prototypes +// +// -- Function prototypes ------------------------------------------------------ +// BLIS_EXPORT_BLIS void bli_rntm_init_from_global( rntm_t* rntm ); @@ -391,5 +393,19 @@ dim_t bli_rntm_calc_num_threads_in const rntm_t* rntm ); +// +// -- rntm_t convenience init wrapper ------------------------------------------ +// + +BLIS_INLINE void bli_rntm_init_if_null( rntm_t** rntm, rntm_t* rntm_l ) +{ + // Initialize a local runtime. If the caller has a NULL rntm_t pointer, + // initialize from the global rntm_t. If the caller has a non-NULL rntm_t + // pointer, initialize from that rntm_t struct. In either case, the now- + // initialized local rntm_t struct is aliased via rntm. + if ( *rntm == NULL ) { bli_rntm_init_from_global( rntm_l ); *rntm = rntm_l; } + else { *rntm_l = **rntm; *rntm = rntm_l; } +} + #endif diff --git a/frame/base/bli_sba.c b/frame/base/bli_sba.c index 776622bb4a..ddf12df7bc 100644 --- a/frame/base/bli_sba.c +++ b/frame/base/bli_sba.c @@ -38,6 +38,28 @@ // Note that the sba is an apool_t of array_t of pool_t. static apool_t sba = { .mutex = BLIS_PTHREAD_MUTEX_INITIALIZER }; +// A boolean that tracks whether bli_sba_init() has completed successfully. +static bool sba_is_init = FALSE; + +// ----------------------------------------------------------------------------- + +bool bli_sba_is_init( void ) +{ + return sba_is_init; +} + +void bli_sba_mark_init( void ) +{ + sba_is_init = TRUE; +} + +void bli_sba_mark_uninit( void ) +{ + sba_is_init = FALSE; +} + +// ----------------------------------------------------------------------------- + apool_t* bli_sba_query( void ) { return &sba; @@ -45,29 +67,56 @@ apool_t* bli_sba_query( void ) // ----------------------------------------------------------------------------- -void bli_sba_init( void ) +err_t bli_sba_init( void ) { - bli_apool_init( &sba ); + err_t r_val; + + // Sanity check: Return early if the API is already initialized. + if ( bli_sba_is_init() ) return BLIS_SUCCESS; + + // Initialize the small block allocator. + r_val = bli_apool_init( &sba ); + bli_check_return_if_failure( r_val ); + + // Mark the API as initialized. + bli_sba_mark_init(); + + return BLIS_SUCCESS; } -void bli_sba_finalize( void ) +err_t bli_sba_finalize( void ) { - bli_apool_finalize( &sba ); + err_t r_val; + + // Sanity check: Return early if the API is uninitialized. + if ( !bli_sba_is_init() ) return BLIS_SUCCESS; + + // Finalize the small block allocator. + r_val = bli_apool_finalize( &sba ); + bli_check_return_if_failure( r_val ); + + // Mark the API as uninitialized. + bli_sba_mark_uninit(); + + return BLIS_SUCCESS; } -void* bli_sba_acquire +// ----------------------------------------------------------------------------- + +err_t bli_sba_acquire ( rntm_t* rntm, - siz_t req_size + siz_t req_size, + void** block ) { - void* block; err_t r_val; #ifdef BLIS_ENABLE_SBA_POOLS if ( rntm == NULL ) { - block = bli_malloc_intl( req_size, &r_val ); + *block = bli_malloc_intl( req_size, &r_val ); + bli_check_return_if_failure( r_val ); } else { @@ -86,7 +135,8 @@ void* bli_sba_acquire // would be timed.) if ( pool == NULL ) { - block = bli_malloc_intl( req_size, &r_val ); + *block = bli_malloc_intl( req_size, &r_val ); + bli_check_return_if_failure( r_val ); } else { @@ -104,20 +154,22 @@ void* bli_sba_acquire } // Check out a block using the block_size queried above. - bli_pool_checkout_block( block_size, &pblk, pool ); + r_val = bli_pool_checkout_block( block_size, &pblk, pool ); + bli_check_return_if_failure( r_val ); // The block address is stored within the pblk_t. - block = bli_pblk_buf( &pblk ); + *block = bli_pblk_buf( &pblk ); } } #else - block = bli_malloc_intl( req_size, &r_val ); + *block = bli_malloc_intl( req_size, &r_val ); + bli_check_return_if_failure( r_val ); #endif // Return the address obtained from the pblk_t. - return block; + return BLIS_SUCCESS; } void bli_sba_release @@ -133,8 +185,6 @@ void bli_sba_release } else { - pblk_t pblk; - // Query the small block pool from the rntm. pool_t* pool = bli_rntm_sba_pool( rntm ); @@ -144,6 +194,8 @@ void bli_sba_release } else { + pblk_t pblk; + // Query the block_size field from the pool. This is not super-important // for this particular application of the pool_t (that is, the "leaf" // component of the sba), but it seems like good housekeeping to maintain @@ -168,16 +220,24 @@ void bli_sba_release #endif } -array_t* bli_sba_checkout_array +// ----------------------------------------------------------------------------- + +err_t bli_sba_checkout_array ( - const siz_t n_threads + siz_t n_threads, + const array_t** array ) { + err_t r_val; + #ifndef BLIS_ENABLE_SBA_POOLS - return NULL; + *array = NULL; return BLIS_SUCCESS; #endif - return bli_apool_checkout_array( n_threads, &sba ); + r_val = bli_apool_checkout_array( n_threads, array, &sba ); + bli_check_return_if_failure( r_val ); + + return BLIS_SUCCESS; } void bli_sba_checkin_array @@ -192,7 +252,9 @@ void bli_sba_checkin_array bli_apool_checkin_array( array, &sba ); } -void bli_sba_rntm_set_pool +// ----------------------------------------------------------------------------- + +err_t bli_sba_rntm_set_pool ( siz_t index, array_t* array, @@ -204,11 +266,16 @@ void bli_sba_rntm_set_pool return; #endif + pool_t* pool; + // Query the pool_t* in the array_t corresponding to index. - pool_t* pool = bli_apool_array_elem( index, array ); + err_t r_val = bli_apool_array_elem( index, array, &pool ); + bli_check_return_if_failure( r_val ); // Embed the pool_t* into the rntm_t. bli_rntm_set_sba_pool( pool, rntm ); + + return BLIS_SUCCESS; } diff --git a/frame/base/bli_sba.h b/frame/base/bli_sba.h index 4fc3aaaeea..7b121e19b6 100644 --- a/frame/base/bli_sba.h +++ b/frame/base/bli_sba.h @@ -35,16 +35,41 @@ #ifndef BLIS_SBA_H #define BLIS_SBA_H +// ----------------------------------------------------------------------------- + +bool bli_sba_is_init( void ); +void bli_sba_mark_init( void ); +void bli_sba_mark_uninit( void ); + +// ----------------------------------------------------------------------------- + apool_t* bli_sba_query( void ); // ----------------------------------------------------------------------------- -void bli_sba_init( void ); -void bli_sba_finalize( void ); +err_t bli_sba_init( void ); +err_t bli_sba_finalize( void ); + +// ----------------------------------------------------------------------------- + +err_t bli_sba_acquire + ( + rntm_t* rntm, + siz_t req_size, + void** block + ); +void bli_sba_release + ( + rntm_t* rntm, + void* block + ); + +// ----------------------------------------------------------------------------- -array_t* bli_sba_checkout_array +err_t bli_sba_checkout_array ( - siz_t n_threads + siz_t n_threads, + const array_t** array ); void bli_sba_checkin_array @@ -52,24 +77,14 @@ void bli_sba_checkin_array array_t* array ); -void bli_sba_rntm_set_pool +// ----------------------------------------------------------------------------- + +err_t bli_sba_rntm_set_pool ( siz_t index, array_t* array, rntm_t* rntm ); -void* bli_sba_acquire - ( - rntm_t* rntm, - siz_t req_size - ); -void bli_sba_release - ( - rntm_t* rntm, - void* block - ); - - #endif diff --git a/frame/base/cast/bli_castnzm.c b/frame/base/cast/old/bli_castnzm.c similarity index 100% rename from frame/base/cast/bli_castnzm.c rename to frame/base/cast/old/bli_castnzm.c diff --git a/frame/base/cast/bli_castnzm.h b/frame/base/cast/old/bli_castnzm.h similarity index 100% rename from frame/base/cast/bli_castnzm.h rename to frame/base/cast/old/bli_castnzm.h diff --git a/frame/compat/amd/bla_gemv_amd.c b/frame/compat/amd/bla_gemv_amd.c index 398d1bf2c2..ef5ae12c72 100644 --- a/frame/compat/amd/bla_gemv_amd.c +++ b/frame/compat/amd/bla_gemv_amd.c @@ -144,7 +144,8 @@ void PASTEF77(ch,blasname) \ /* Obtain a valid context from the gks. This is needed because these implementations of ?gemv_() skip calling gemv_ex() and instead call the unblocked fused variants directly. */ \ - cntx_t* cntx = bli_gks_query_cntx(); \ + const cntx_t* cntx; \ + bli_gks_query_cntx( &cntx ); \ \ /* Invoke the variant chosen above, which loops over a level-1v or level-1f kernel to implement the current operation. */ \ diff --git a/frame/compat/extra/bla_gemm3m.c b/frame/compat/extra/bla_gemm3m.c index 258ac5bbbe..e1099a2648 100644 --- a/frame/compat/extra/bla_gemm3m.c +++ b/frame/compat/extra/bla_gemm3m.c @@ -103,7 +103,8 @@ void PASTEF77(ch,blasname) \ abbreviated version of bli_gemm_ex() so that we can bypass consideration of sup, which doesn't make sense in this context. */ \ { \ - cntx_t* cntx = ( cntx_t* )bli_gks_query_ind_cntx( BLIS_1M ); \ + const cntx_t* cntx; \ + bli_gks_query_ind_cntx( BLIS_1M, &cntx ); \ \ rntm_t rntm_l; \ rntm_t* rntm = &rntm_l; \ @@ -222,7 +223,8 @@ void PASTEF77(ch,blasname) \ abbreviated version of bli_gemm_ex() so that we can bypass consideration of sup, which doesn't make sense in this context. */ \ { \ - cntx_t* cntx = ( cntx_t* )bli_gks_query_ind_cntx( BLIS_1M ); \ + const cntx_t* cntx; \ + bli_gks_query_ind_cntx( BLIS_1M, &cntx ); \ \ rntm_t rntm_l; \ rntm_t* rntm = &rntm_l; \ diff --git a/frame/include/bli_error_macro_defs.h b/frame/include/bli_error_macro_defs.h index 00d8acdcb8..5d32df9f4e 100644 --- a/frame/include/bli_error_macro_defs.h +++ b/frame/include/bli_error_macro_defs.h @@ -40,5 +40,89 @@ bli_check_error_code_helper( code, __FILE__, __LINE__ ) +// TODO: Consider renaming this macro to one of: +// - bli_error_handle() +// - bli_error_handle_code() +// Also, consider replacing instances of +// if ( bli_is_failure( r_val ) ) return r_val; +// to a macro named something like: +// - bli_check_return_failure( r_val ); +// Also, consider adding some of logic from bli_check_error_code_helper() to +// 'else' branch of bli_check_return_error_code() so that we can intercept +// and handle undefined error codes? + +#define bli_check_return_error_code( code ) \ +{ \ + if ( bli_is_failure( code ) ) \ + { \ + if ( bli_error_mode_is_return() ) \ + { \ + return code; \ + } \ + else /* if ( bli_error_mode_is_abort() ) */ \ + { \ + bli_print_msg( bli_error_string_for_code( code ), \ + __FILE__, __LINE__ ); \ + bli_abort(); \ + } \ + } \ +} + +#define bli_check_threads_return_if_failure( e_val_p, thread ) \ +{ \ + /* Broadcast the address of the master thread's copy of e_val. */ \ + err_t* e_val_t0_p = bli_thread_broadcast( thread, e_val_p ); \ +\ + /* If the local error checking resulted in failure, save it to the master + thread's e_val. Note this includes master overwriting its own e_val. */ \ + if ( bli_is_failure( *(e_val_p) ) ) *e_val_t0_p = *(e_val_p); \ +\ + /* Wait for all theads to execute the previous code. */ \ + bli_thread_barrier( thread ); \ +\ + /* If any thread reported failure, everyone returns. All threads + return their local error code. */ \ + if ( bli_is_failure( *e_val_t0_p ) ) return *e_val_p; \ +} + +#define bli_check_thread0_return_if_failure( e_val_p, thread ) \ +{ \ + /* Broadcast the address of the master thread's copy of e_val. */ \ + err_t* e_val_t0_p = bli_thread_broadcast( thread, e_val_p ); \ +\ + /* If the master thread reported failure, everyone returns. All threads + return their local error code. */ \ + if ( bli_is_failure( *e_val_t0_p ) ) return *e_val_p; \ +} + +#define bli_check_return_if_failure( error_code ) \ +{ \ + if ( bli_is_failure( error_code ) ) return error_code; \ +} + +#define bli_check_callthen_return_if_failure( func, error_code ) \ +{ \ + /* Note that the 'func' token will be a function call, including its + parenthesized parameter list (even if it is empty). */ \ + if ( bli_is_failure( error_code ) ) { func; return error_code; } \ +} + +#define bli_check_return_other_if_failure( error_code, other_val ) \ +{ \ + if ( bli_is_failure( error_code ) ) return other_val; \ +} + +#define BLIS_INIT_ONCE() \ +{ \ + err_t r_val = bli_init_once(); \ + bli_check_return_if_failure( r_val ); \ +} + +#define BLIS_FINALIZE_ONCE() \ +{ \ + err_t r_val = bli_finalize_once(); \ + bli_check_return_if_failure( r_val ); \ +} + #endif diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index 1822065dab..1c8dcc95c5 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -427,6 +427,21 @@ BLIS_INLINE bool bli_is_unit_diag( diag_t diag ) } +// ind_t + +BLIS_INLINE bool bli_is_1m( ind_t im ) +{ + return ( bool ) + ( im == BLIS_1M ); +} + +BLIS_INLINE bool bli_is_nat( ind_t im ) +{ + return ( bool ) + ( im == BLIS_NAT ); +} + + // err_t-related BLIS_INLINE bool bli_is_success( err_t err ) @@ -513,6 +528,21 @@ BLIS_INLINE void bli_set_dims_incs_with_trans( trans_t trans, } +// direction-related + +BLIS_INLINE bool bli_is_fwd( dir_t direct ) +{ + return ( bool ) + ( direct == BLIS_FWD ); +} + +BLIS_INLINE bool bli_is_bwd( dir_t direct ) +{ + return ( bool ) + ( direct == BLIS_BWD ); +} + + // blocksize-related BLIS_INLINE dim_t bli_determine_blocksize_dim_f( dim_t i, dim_t dim, dim_t b_alg ) diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 08c7ddc4a6..4025bb5a5c 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -471,6 +471,10 @@ typedef enum // -- Data type -- +// NOTE: There are bits of code in BLIS that implicitly assume that we can +// index from BLIS_DT_LO (BLIS_FLOAT) to BLIS_DT_HI (BLIS_DCOMPLEX). Thus, +// those types need to be kept together / adjacent / contiguous. + typedef enum { BLIS_FLOAT = BLIS_BITVAL_FLOAT_TYPE, @@ -831,8 +835,9 @@ typedef enum // index 0, implement something like a BLIS_OPID_LEVEL3_RANGE_START // value that can be subtracted from the opid_t value to map it // to a zero-based range. -// This is needed because these level-3 opid_t values are used in -// bli_l3_ind.c to index into arrays. +// This is needed because some code in BLIS indexes with opid_t values, +// such as through an array, hence why starting at 0 is important +// (example: bli_l3_ind.c). // BLIS_GEMM = 0, BLIS_GEMMT, @@ -956,6 +961,15 @@ typedef enum // value (BLIS_ARCH_GENERIC) is given index num_archs-1. BLIS_NUM_ARCHS +# if 0 + // The maximum number of chars (including null terminator '\0') that we + // would ever need to store the name of a configuration as a string. This + // is used very infrequently, but there are times when we want to allocate + // enough bytes for all arch_t strings (as defined in bli_arch.c) without + // searching for the longest string at runtime. + BLIS_ARCH_MAX_STR_LEN = 20 +#endif + } arch_t; @@ -1460,19 +1474,27 @@ typedef enum BLIS_FULL_ERROR_CHECKING } errlev_t; +typedef enum +{ + BLIS_ERROR_RETURN = 0, + BLIS_ERROR_ABORT +} errmode_t; + typedef enum { // Generic error codes - BLIS_SUCCESS = ( -1), - BLIS_FAILURE = ( -2), + BLIS_SUCCESS = ( 0), + BLIS_FAILURE = ( -1), BLIS_ERROR_CODE_MIN = ( -9), // General errors BLIS_INVALID_ERROR_CHECKING_LEVEL = ( -10), - BLIS_UNDEFINED_ERROR_CODE = ( -11), - BLIS_NULL_POINTER = ( -12), - BLIS_NOT_YET_IMPLEMENTED = ( -13), + BLIS_INVALID_ERROR_HANDLING_MODE = ( -11), + BLIS_UNDEFINED_ERROR_CODE = ( -12), + BLIS_NULL_POINTER = ( -13), + BLIS_NOT_YET_IMPLEMENTED = ( -14), + BLIS_REJECT_EXEC = ( -15), // Parameter-specific errors BLIS_INVALID_SIDE = ( -20), @@ -1521,47 +1543,61 @@ typedef enum // Storage-specific errors BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT = ( -70), + // Induced method-specific errors + BLIS_INVALID_IND = ( -80), + // Partitioning-specific errors - BLIS_INVALID_3x1_SUBPART = ( -80), - BLIS_INVALID_1x3_SUBPART = ( -81), - BLIS_INVALID_3x3_SUBPART = ( -82), + BLIS_INVALID_DIRECTION = ( -90), + BLIS_INVALID_3x1_SUBPART = ( -91), + BLIS_INVALID_1x3_SUBPART = ( -92), + BLIS_INVALID_3x3_SUBPART = ( -93), + BLIS_ROW_OFFSET_LESS_THAN_ZERO = ( -94), + BLIS_ROW_OFFSET_EXCEEDS_NUM_ROWS = ( -95), + BLIS_COL_OFFSET_LESS_THAN_ZERO = ( -96), + BLIS_COL_OFFSET_EXCEEDS_NUM_COLS = ( -97), + BLIS_VECTOR_OFFSET_LESS_THAN_ZERO = ( -98), + BLIS_VECTOR_OFFSET_EXCEEDS_NUM_ELEM = ( -99), // Control tree-specific errors - BLIS_UNEXPECTED_NULL_CONTROL_TREE = ( -90), + BLIS_UNEXPECTED_NULL_CONTROL_TREE = (-100), // Packing-specific errors - BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK = (-100), + BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK = (-110), // Buffer-specific errors - BLIS_EXPECTED_NONNULL_OBJECT_BUFFER = (-110), + BLIS_EXPECTED_NONNULL_OBJECT_BUFFER = (-120), // Memory errors - BLIS_MALLOC_RETURNED_NULL = (-120), + BLIS_MALLOC_RETURNED_NULL = (-130), // Internal memory pool errors - BLIS_INVALID_PACKBUF = (-130), - BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-131), - BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-132), - BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-133), - BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-134), + BLIS_INVALID_PACKBUF = (-140), + BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-141), + BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-142), + BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-143), + BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-144), + BLIS_MEM_POOL_BLOCKS_OUTSTANDING = (-145), // Object-related errors - BLIS_EXPECTED_OBJECT_ALIAS = (-140), + BLIS_EXPECTED_OBJECT_ALIAS = (-150), // Architecture-related errors - BLIS_INVALID_ARCH_ID = (-150), - BLIS_UNINITIALIZED_GKS_CNTX = (-151), - BLIS_INVALID_UKR_ID = (-152), + BLIS_INVALID_ARCH_ID = (-160), + BLIS_UNINITIALIZED_GKS_CNTX = (-161), + BLIS_INVALID_UKR_ID = (-162), // Blocksize-related errors - BLIS_MC_DEF_NONMULTIPLE_OF_MR = (-160), - BLIS_MC_MAX_NONMULTIPLE_OF_MR = (-161), - BLIS_NC_DEF_NONMULTIPLE_OF_NR = (-162), - BLIS_NC_MAX_NONMULTIPLE_OF_NR = (-163), - BLIS_KC_DEF_NONMULTIPLE_OF_KR = (-164), - BLIS_KC_MAX_NONMULTIPLE_OF_KR = (-165), - - BLIS_ERROR_CODE_MAX = (-170) + BLIS_MC_DEF_NONMULTIPLE_OF_MR = (-170), + BLIS_MC_MAX_NONMULTIPLE_OF_MR = (-171), + BLIS_NC_DEF_NONMULTIPLE_OF_NR = (-172), + BLIS_NC_MAX_NONMULTIPLE_OF_NR = (-173), + BLIS_KC_DEF_NONMULTIPLE_OF_KR = (-174), + BLIS_KC_MAX_NONMULTIPLE_OF_KR = (-175), + + // Thread-related errors + BLIS_EXPECTED_DIFF_NUM_THREADS = (-180), + + BLIS_ERROR_CODE_MAX = (-190) } err_t; #endif diff --git a/frame/include/blis.h b/frame/include/blis.h index 98ebee878d..cac3d29258 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -124,16 +124,15 @@ extern "C" { #include "bli_array.h" #include "bli_apool.h" #include "bli_sba.h" -#include "bli_memsys.h" #include "bli_mem.h" #include "bli_part.h" #include "bli_prune.h" #include "bli_query.h" #include "bli_auxinfo.h" +#include "bli_error.h" #include "bli_param_map.h" #include "bli_clock.h" #include "bli_check.h" -#include "bli_error.h" #include "bli_f2c.h" #include "bli_machval.h" #include "bli_getopt.h" @@ -150,7 +149,7 @@ extern "C" { #include "bli_setri.h" #include "bli_castm.h" -#include "bli_castnzm.h" +//#include "bli_castnzm.h" #include "bli_castv.h" #include "bli_projm.h" #include "bli_projv.h" diff --git a/frame/thread/bli_l3_decor_openmp.c b/frame/thread/bli_l3_decor_openmp.c index 2c71c75321..f2b4b765be 100644 --- a/frame/thread/bli_l3_decor_openmp.c +++ b/frame/thread/bli_l3_decor_openmp.c @@ -72,7 +72,8 @@ void bli_l3_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field @@ -85,7 +86,8 @@ void bli_l3_thread_decorator bli_pba_rntm_set_pba( rntm ); // Allocate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); _Pragma( "omp parallel num_threads(n_threads)" ) diff --git a/frame/thread/bli_l3_decor_pthreads.c b/frame/thread/bli_l3_decor_pthreads.c index 80247dfb1c..3d1b88d101 100644 --- a/frame/thread/bli_l3_decor_pthreads.c +++ b/frame/thread/bli_l3_decor_pthreads.c @@ -164,7 +164,8 @@ void bli_l3_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field @@ -177,7 +178,8 @@ void bli_l3_thread_decorator bli_pba_rntm_set_pba( rntm ); // Allocate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); // Allocate an array of pthread objects and auxiliary data structs to pass // to the thread entry functions. diff --git a/frame/thread/bli_l3_decor_single.c b/frame/thread/bli_l3_decor_single.c index c2c43b3703..30c98304b2 100644 --- a/frame/thread/bli_l3_decor_single.c +++ b/frame/thread/bli_l3_decor_single.c @@ -75,7 +75,8 @@ void bli_l3_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we can create the global comm below. @@ -85,7 +86,8 @@ void bli_l3_thread_decorator bli_pba_rntm_set_pba( rntm ); // Allcoate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); { diff --git a/frame/thread/bli_l3_sup_decor_openmp.c b/frame/thread/bli_l3_sup_decor_openmp.c index ff6bc667d3..a07b508b58 100644 --- a/frame/thread/bli_l3_sup_decor_openmp.c +++ b/frame/thread/bli_l3_sup_decor_openmp.c @@ -66,7 +66,8 @@ err_t bli_l3_sup_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field @@ -78,8 +79,9 @@ err_t bli_l3_sup_thread_decorator // the rntm below. bli_pba_rntm_set_pba( rntm ); - // Allcoate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + // Allocate a global communicator for the root thrinfo_t structures. + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); _Pragma( "omp parallel num_threads(n_threads)" ) diff --git a/frame/thread/bli_l3_sup_decor_pthreads.c b/frame/thread/bli_l3_sup_decor_pthreads.c index 375a85730e..e5688995e3 100644 --- a/frame/thread/bli_l3_sup_decor_pthreads.c +++ b/frame/thread/bli_l3_sup_decor_pthreads.c @@ -133,7 +133,8 @@ err_t bli_l3_sup_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field @@ -146,7 +147,8 @@ err_t bli_l3_sup_thread_decorator bli_pba_rntm_set_pba( rntm ); // Allocate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); // Allocate an array of pthread objects and auxiliary data structs to pass // to the thread entry functions. diff --git a/frame/thread/bli_l3_sup_decor_single.c b/frame/thread/bli_l3_sup_decor_single.c index df767ad292..9c93008db9 100644 --- a/frame/thread/bli_l3_sup_decor_single.c +++ b/frame/thread/bli_l3_sup_decor_single.c @@ -61,7 +61,8 @@ err_t bli_l3_sup_thread_decorator // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. - array_t* array = bli_sba_checkout_array( n_threads ); + array_t* array; + bli_sba_checkout_array( n_threads, ( const array_t** )&array ); // Access the pool_t* for thread 0 and embed it into the rntm. bli_sba_rntm_set_pool( 0, array, rntm ); @@ -70,8 +71,9 @@ err_t bli_l3_sup_thread_decorator bli_pba_rntm_set_pba( rntm ); #ifndef SKIP_THRINFO_TREE - // Allcoate a global communicator for the root thrinfo_t structures. - thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); + // Allocate a global communicator for the root thrinfo_t structures. + thrcomm_t* gl_comm; + bli_thrcomm_create( rntm, n_threads, &gl_comm ); #endif diff --git a/frame/thread/bli_thrcomm.h b/frame/thread/bli_thrcomm.h index d0ffb13461..c3565fbd57 100644 --- a/frame/thread/bli_thrcomm.h +++ b/frame/thread/bli_thrcomm.h @@ -52,15 +52,15 @@ BLIS_INLINE dim_t bli_thrcomm_num_threads( thrcomm_t* comm ) // Thread communicator prototypes. -thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ); -void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ); -void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm ); -void bli_thrcomm_cleanup( thrcomm_t* comm ); +err_t bli_thrcomm_create( rntm_t* rntm, dim_t n_threads, thrcomm_t** comm ); +void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ); +void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm ); +void bli_thrcomm_cleanup( thrcomm_t* comm ); BLIS_EXPORT_BLIS void bli_thrcomm_barrier( dim_t thread_id, thrcomm_t* comm ); BLIS_EXPORT_BLIS void* bli_thrcomm_bcast( dim_t inside_id, void* to_send, thrcomm_t* comm ); -void bli_thrcomm_barrier_atomic( dim_t thread_id, thrcomm_t* comm ); +void bli_thrcomm_barrier_atomic( dim_t thread_id, thrcomm_t* comm ); #endif diff --git a/frame/thread/bli_thrcomm_openmp.c b/frame/thread/bli_thrcomm_openmp.c index 9bb35ea31a..0c7e81af93 100644 --- a/frame/thread/bli_thrcomm_openmp.c +++ b/frame/thread/bli_thrcomm_openmp.c @@ -37,17 +37,24 @@ #ifdef BLIS_ENABLE_OPENMP -thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ) +err_t bli_thrcomm_create( rntm_t* rntm, dim_t n_threads, thrcomm_t** comm ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_create(): " ); #endif - thrcomm_t* comm = bli_sba_acquire( rntm, sizeof(thrcomm_t) ); + err_t r_val; + thrcomm_t* tc; - bli_thrcomm_init( n_threads, comm ); + r_val = bli_sba_acquire( rntm, sizeof( thrcomm_t ), ( void** )&tc ); + bli_check_return_if_failure( r_val ); - return comm; + bli_thrcomm_init( n_threads, tc ); + + // Set the thrcomm_t pointer. + *comm = tc; + + return BLIS_SUCCESS; } void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ) diff --git a/frame/thread/bli_thrcomm_pthreads.c b/frame/thread/bli_thrcomm_pthreads.c index d0896f94df..d057bf3b1e 100644 --- a/frame/thread/bli_thrcomm_pthreads.c +++ b/frame/thread/bli_thrcomm_pthreads.c @@ -37,17 +37,24 @@ #ifdef BLIS_ENABLE_PTHREADS -thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ) +err_t bli_thrcomm_create( rntm_t* rntm, dim_t n_threads, thrcomm_t** comm ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_create(): " ); #endif - thrcomm_t* comm = bli_sba_acquire( rntm, sizeof(thrcomm_t) ); + err_t r_val; + thrcomm_t* tc; - bli_thrcomm_init( n_threads, comm ); + r_val = bli_sba_acquire( rntm, sizeof( thrcomm_t ), ( void** )&tc ); + bli_check_return_if_failure( r_val ); - return comm; + bli_thrcomm_init( n_threads, tc ); + + // Set the thrcomm_t pointer. + *comm = tc; + + return BLIS_SUCCESS; } void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ) diff --git a/frame/thread/bli_thrcomm_single.c b/frame/thread/bli_thrcomm_single.c index cedb3c5b6e..87014a902e 100644 --- a/frame/thread/bli_thrcomm_single.c +++ b/frame/thread/bli_thrcomm_single.c @@ -37,18 +37,24 @@ #ifndef BLIS_ENABLE_MULTITHREADING -//Constructors and destructors for constructors -thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ) +err_t bli_thrcomm_create( rntm_t* rntm, dim_t n_threads, thrcomm_t** comm ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_create(): " ); #endif - thrcomm_t* comm = bli_sba_acquire( rntm, sizeof( thrcomm_t ) ); + err_t r_val; + thrcomm_t* tc; - bli_thrcomm_init( n_threads, comm ); + r_val = bli_sba_acquire( rntm, sizeof( thrcomm_t ), ( void** )&tc ); + bli_check_return_if_failure( r_val ); - return comm; + bli_thrcomm_init( n_threads, tc ); + + // Set the thrcomm_t pointer. + *comm = tc; + + return BLIS_SUCCESS; } void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ) diff --git a/frame/thread/bli_thread.c b/frame/thread/bli_thread.c index 7d647a314b..30db047b4d 100644 --- a/frame/thread/bli_thread.c +++ b/frame/thread/bli_thread.c @@ -46,10 +46,34 @@ extern rntm_t global_rntm; // resides in bli_rntm.c.) extern bli_pthread_mutex_t global_rntm_mutex; +// A boolean that tracks whether bli_thread_init() has completed successfully. +static bool thread_is_init = FALSE; + // ----------------------------------------------------------------------------- -void bli_thread_init( void ) +bool bli_thread_is_init( void ) +{ + return thread_is_init; +} + +void bli_thread_mark_init( void ) +{ + thread_is_init = TRUE; +} + +void bli_thread_mark_uninit( void ) { + thread_is_init = FALSE; +} + +// ----------------------------------------------------------------------------- + +err_t bli_thread_init( void ) +{ + // Sanity check: Return early if the API is already initialized. + if ( bli_thread_is_init() ) return BLIS_SUCCESS; + + // Initialize some global communicators. bli_thrcomm_init( 1, &BLIS_SINGLE_COMM ); bli_packm_thrinfo_init_single( &BLIS_PACKM_SINGLE_THREADED ); bli_l3_thrinfo_init_single( &BLIS_GEMM_SINGLE_THREADED ); @@ -57,10 +81,22 @@ void bli_thread_init( void ) // Read the environment variables and use them to initialize the // global runtime object. bli_thread_init_rntm_from_env( &global_rntm ); + + // Mark the API as initialized. + bli_thread_mark_init(); + + return BLIS_SUCCESS; } -void bli_thread_finalize( void ) +err_t bli_thread_finalize( void ) { + // Sanity check: Return early if the API is uninitialized. + if ( !bli_thread_is_init() ) return BLIS_SUCCESS; + + // Mark the API as uninitialized. + bli_thread_mark_uninit(); + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- @@ -1509,7 +1545,7 @@ dim_t bli_ipow( dim_t base, dim_t power ) dim_t bli_thread_get_jc_nt( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_jc_ways( &global_rntm ); } @@ -1517,7 +1553,7 @@ dim_t bli_thread_get_jc_nt( void ) dim_t bli_thread_get_pc_nt( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_pc_ways( &global_rntm ); } @@ -1525,7 +1561,7 @@ dim_t bli_thread_get_pc_nt( void ) dim_t bli_thread_get_ic_nt( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_ic_ways( &global_rntm ); } @@ -1533,7 +1569,7 @@ dim_t bli_thread_get_ic_nt( void ) dim_t bli_thread_get_jr_nt( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_jr_ways( &global_rntm ); } @@ -1541,7 +1577,7 @@ dim_t bli_thread_get_jr_nt( void ) dim_t bli_thread_get_ir_nt( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_ir_ways( &global_rntm ); } @@ -1549,17 +1585,17 @@ dim_t bli_thread_get_ir_nt( void ) dim_t bli_thread_get_num_threads( void ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); return bli_rntm_num_threads( &global_rntm ); } // ---------------------------------------------------------------------------- -void bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ) +err_t bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); @@ -1568,12 +1604,14 @@ void bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ) // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); + + return BLIS_SUCCESS; } -void bli_thread_set_num_threads( dim_t n_threads ) +err_t bli_thread_set_num_threads( dim_t n_threads ) { // We must ensure that global_rntm has been initialized. - bli_init_once(); + BLIS_INIT_ONCE(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); @@ -1582,6 +1620,8 @@ void bli_thread_set_num_threads( dim_t n_threads ) // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); + + return BLIS_SUCCESS; } // ---------------------------------------------------------------------------- diff --git a/frame/thread/bli_thread.h b/frame/thread/bli_thread.h index 5e9c650b5b..68c03857e5 100644 --- a/frame/thread/bli_thread.h +++ b/frame/thread/bli_thread.h @@ -58,8 +58,12 @@ #include "bli_l3_sup_decor.h" // Initialization-related prototypes. -void bli_thread_init( void ); -void bli_thread_finalize( void ); +bool bli_thread_is_init( void ); +void bli_thread_mark_init( void ); +void bli_thread_mark_uninit( void ); + +err_t bli_thread_init( void ); +err_t bli_thread_finalize( void ); // Thread range-related prototypes. @@ -202,8 +206,8 @@ BLIS_EXPORT_BLIS dim_t bli_thread_get_jr_nt( void ); BLIS_EXPORT_BLIS dim_t bli_thread_get_ir_nt( void ); BLIS_EXPORT_BLIS dim_t bli_thread_get_num_threads( void ); -BLIS_EXPORT_BLIS void bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ); -BLIS_EXPORT_BLIS void bli_thread_set_num_threads( dim_t value ); +BLIS_EXPORT_BLIS err_t bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ); +BLIS_EXPORT_BLIS err_t bli_thread_set_num_threads( dim_t value ); void bli_thread_init_rntm_from_env( rntm_t* rntm ); diff --git a/frame/thread/bli_thrinfo.c b/frame/thread/bli_thrinfo.c index bbe7114009..e5d59362d4 100644 --- a/frame/thread/bli_thrinfo.c +++ b/frame/thread/bli_thrinfo.c @@ -35,23 +35,28 @@ #include "blis.h" -thrinfo_t* bli_thrinfo_create +err_t bli_thrinfo_create ( - rntm_t* rntm, - thrcomm_t* ocomm, - dim_t ocomm_id, - dim_t n_way, - dim_t work_id, - bool free_comm, - bszid_t bszid, - thrinfo_t* sub_node + rntm_t* rntm, + thrcomm_t* ocomm, + dim_t ocomm_id, + dim_t n_way, + dim_t work_id, + bool free_comm, + bszid_t bszid, + thrinfo_t* sub_node, + thrinfo_t** node ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrinfo_create(): " ); #endif - thrinfo_t* thread = bli_sba_acquire( rntm, sizeof( thrinfo_t ) ); + err_t r_val; + thrinfo_t* thread; + + r_val = bli_sba_acquire( rntm, sizeof( thrinfo_t ), ( void** )&thread ); + bli_check_return_if_failure( r_val ); bli_thrinfo_init ( @@ -63,7 +68,10 @@ thrinfo_t* bli_thrinfo_create sub_node ); - return thread; + // Set the thrinfo_t pointer. + *node = thread; + + return BLIS_SUCCESS; } void bli_thrinfo_init @@ -153,13 +161,15 @@ void bli_thrinfo_free // ----------------------------------------------------------------------------- -void bli_thrinfo_grow +err_t bli_thrinfo_grow ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { + err_t r_val; + // First, consider the prenode branch of the thrinfo_t tree, which should be // expanded only if there exists a prenode branch in the cntl_t tree. @@ -169,26 +179,23 @@ void bli_thrinfo_grow // is non-NULL, then it has already been created and we'll use it as-is. if ( bli_thrinfo_sub_prenode( thread ) == NULL ) { - // Assertion / sanity check. - if ( bli_cntl_bszid( cntl ) != BLIS_MC ) - { - printf( "Assertion failed: Expanding prenode for non-IC loop?\n" ); - bli_abort(); - } - // Now we must create the packa, jr, and ir nodes that make up // the prenode branch of current cntl_t node. + thrinfo_t* thread_prenode; + // Create a new node (or, if needed, multiple nodes) along the // prenode branch of the tree and return the pointer to the // (highest) child. - thrinfo_t* thread_prenode = bli_thrinfo_rgrow_prenode + r_val = bli_thrinfo_rgrow_prenode ( rntm, cntl, bli_cntl_sub_prenode( cntl ), - thread + thread, + &thread_prenode ); + bli_check_return_if_failure( r_val ); // Attach the child thrinfo_t node for the secondary branch to its // parent structure. @@ -206,35 +213,42 @@ void bli_thrinfo_grow // is non-NULL, then it has already been created and we'll use it as-is. if ( bli_thrinfo_sub_node( thread ) == NULL ) { + thrinfo_t* thread_child; + // Create a new node (or, if needed, multiple nodes) along the // main sub-node branch of the tree and return the pointer to the // (highest) child. - thrinfo_t* thread_child = bli_thrinfo_rgrow + r_val = bli_thrinfo_rgrow ( rntm, cntl, bli_cntl_sub_node( cntl ), - thread + thread, + &thread_child ); + bli_check_return_if_failure( r_val ); // Attach the child thrinfo_t node for the primary branch to its // parent structure. bli_thrinfo_set_sub_node( thread_child, thread ); } } + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -thrinfo_t* bli_thrinfo_rgrow +err_t bli_thrinfo_rgrow ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_cur, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ) { - thrinfo_t* thread_cur; + err_t r_val; // We must handle two cases: those where the next node in the // control tree is a partitioning node, and those where it is @@ -243,25 +257,31 @@ thrinfo_t* bli_thrinfo_rgrow { // Create the child thrinfo_t node corresponding to cntl_cur, // with cntl_par being the parent. - thread_cur = bli_thrinfo_create_for_cntl + r_val = bli_thrinfo_create_for_cntl ( rntm, cntl_par, cntl_cur, - thread_par + thread_par, + thread_cur ); + bli_check_return_if_failure( r_val ); } else // if ( bli_cntl_bszid( cntl_cur ) == BLIS_NO_PART ) { + thrinfo_t* thread_seg; + // Recursively grow the thread structure and return the top-most // thrinfo_t node of that segment. - thrinfo_t* thread_seg = bli_thrinfo_rgrow + r_val = bli_thrinfo_rgrow ( rntm, cntl_par, bli_cntl_sub_node( cntl_cur ), - thread_par + thread_par, + &thread_seg ); + bli_check_return_if_failure( r_val ); // Create a thrinfo_t node corresponding to cntl_cur. Since the // corresponding cntl node, cntl_cur, is a non-partitioning node @@ -272,7 +292,7 @@ thrinfo_t* bli_thrinfo_rgrow // to FALSE since cntl_cur is a non-partitioning node. The reason: // the communicator used here will be freed when thread_seg, or one // of its descendents, is freed. - thread_cur = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm bli_thrinfo_ocomm( thread_seg ), // ocomm @@ -281,28 +301,33 @@ thrinfo_t* bli_thrinfo_rgrow bli_thread_ocomm_id( thread_seg ), // work_id FALSE, // free_comm BLIS_NO_PART, // bszid - thread_seg // sub_node + thread_seg, // sub_node + thread_cur // node ); + bli_check_return_if_failure( r_val ); } - return thread_cur; + return BLIS_SUCCESS; } #define BLIS_NUM_STATIC_COMMS 80 -thrinfo_t* bli_thrinfo_create_for_cntl +err_t bli_thrinfo_create_for_cntl ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_chl, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_chl, + thrinfo_t* thread_par, + thrinfo_t** thread_chl ) { + err_t r_val; + // If we are running with a single thread, all of the code can be reduced // and simplified to this. if ( bli_rntm_calc_num_threads( rntm ) == 1 ) { - thrinfo_t* thread_chl = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm &BLIS_SINGLE_COMM, // ocomm @@ -311,9 +336,12 @@ thrinfo_t* bli_thrinfo_create_for_cntl 0, // work_id FALSE, // free_comm BLIS_NO_PART, // bszid - NULL // sub_node + NULL, // sub_node + thread_chl // node ); - return thread_chl; + bli_check_return_if_failure( r_val ); + + return BLIS_SUCCESS; } thrcomm_t* static_comms[ BLIS_NUM_STATIC_COMMS ]; @@ -321,18 +349,20 @@ thrinfo_t* bli_thrinfo_create_for_cntl const bszid_t bszid_chl = bli_cntl_bszid( cntl_chl ); - const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); + //const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); const dim_t parent_n_way = bli_thread_n_way( thread_par ); const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par ); const dim_t parent_work_id = bli_thread_work_id( thread_par ); +#if 0 // Sanity check: make sure the number of threads in the parent's // communicator is divisible by the number of new sub-groups. if ( parent_nt_in % parent_n_way != 0 ) { printf( "Assertion failed: parent_nt_in parent_n_way != 0\n" ); - bli_abort(); + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } +#endif // Compute: // - the number of threads inside the new child comm, @@ -350,13 +380,23 @@ thrinfo_t* bli_thrinfo_create_for_cntl // pointers. if ( bli_thread_am_ochief( thread_par ) ) { - err_t r_val; - if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) + { new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ), &r_val ); + } else - new_comms = static_comms; + { + new_comms = static_comms; r_val = BLIS_SUCCESS; + } } + else + { + r_val = BLIS_SUCCESS; + } + + // If the master thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_thread0_return_if_failure( &r_val, thread_par ); // Broadcast the temporary array to all threads in the parent's // communicator. @@ -366,13 +406,23 @@ thrinfo_t* bli_thrinfo_create_for_cntl // object and store it in the array element corresponding to the // parent's work id. if ( child_comm_id == 0 ) - new_comms[ parent_work_id ] = bli_thrcomm_create( rntm, child_nt_in ); + { + r_val = bli_thrcomm_create( rntm, child_nt_in, &new_comms[ parent_work_id ] ); + } + else + { + r_val = BLIS_SUCCESS; + } + + // If any thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_threads_return_if_failure( &r_val, thread_par ); bli_thread_barrier( thread_par ); // All threads create a new thrinfo_t node using the communicator // that was created by their chief, as identified by parent_work_id. - thrinfo_t* thread_chl = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm new_comms[ parent_work_id ], // ocomm @@ -381,9 +431,14 @@ thrinfo_t* bli_thrinfo_create_for_cntl child_work_id, // work_id TRUE, // free_comm bszid_chl, // bszid - NULL // sub_node + NULL, // sub_node + thread_chl // node ); + // If any thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_threads_return_if_failure( &r_val, thread_par ); + bli_thread_barrier( thread_par ); // The parent's chief thread frees the temporary array of thrcomm_t @@ -394,20 +449,21 @@ thrinfo_t* bli_thrinfo_create_for_cntl bli_free_intl( new_comms ); } - return thread_chl; + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -thrinfo_t* bli_thrinfo_rgrow_prenode +err_t bli_thrinfo_rgrow_prenode ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_cur, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ) { - thrinfo_t* thread_cur; + err_t r_val; // We must handle two cases: those where the next node in the // control tree is a partitioning node, and those where it is @@ -416,25 +472,31 @@ thrinfo_t* bli_thrinfo_rgrow_prenode { // Create the child thrinfo_t node corresponding to cntl_cur, // with cntl_par being the parent. - thread_cur = bli_thrinfo_create_for_cntl_prenode + r_val = bli_thrinfo_create_for_cntl_prenode ( rntm, cntl_par, cntl_cur, - thread_par + thread_par, + thread_cur ); + bli_check_return_if_failure( r_val ); } else // if ( bli_cntl_bszid( cntl_cur ) == BLIS_NO_PART ) { + thrinfo_t* thread_seg; + // Recursively grow the thread structure and return the top-most // thrinfo_t node of that segment. - thrinfo_t* thread_seg = bli_thrinfo_rgrow_prenode + r_val = bli_thrinfo_rgrow_prenode ( rntm, cntl_par, bli_cntl_sub_node( cntl_cur ), - thread_par + thread_par, + &thread_seg ); + bli_check_return_if_failure( r_val ); // Create a thrinfo_t node corresponding to cntl_cur. Since the // corresponding cntl node, cntl_cur, is a non-partitioning node @@ -445,7 +507,7 @@ thrinfo_t* bli_thrinfo_rgrow_prenode // to FALSE since cntl_cur is a non-partitioning node. The reason: // the communicator used here will be freed when thread_seg, or one // of its descendents, is freed. - thread_cur = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm bli_thrinfo_ocomm( thread_seg ), // ocomm @@ -454,19 +516,22 @@ thrinfo_t* bli_thrinfo_rgrow_prenode bli_thread_ocomm_id( thread_seg ), // work_id FALSE, // free_comm BLIS_NO_PART, // bszid - thread_seg // sub_node + thread_seg, // sub_node + thread_cur // node ); + bli_check_return_if_failure( r_val ); } - return thread_cur; + return BLIS_SUCCESS; } -thrinfo_t* bli_thrinfo_create_for_cntl_prenode +err_t bli_thrinfo_create_for_cntl_prenode ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_chl, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_chl, + thrinfo_t* thread_par, + thrinfo_t** thread ) { // NOTE: This function only has to work for the ic -> (pa -> jr) @@ -474,21 +539,25 @@ thrinfo_t* bli_thrinfo_create_for_cntl_prenode // bli_thrinfo_create_for_cntl() will be called for the last jr->ir // branch extension. + err_t r_val; + const bszid_t bszid_chl = bli_cntl_bszid( cntl_chl ); const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); - const dim_t parent_n_way = bli_thread_n_way( thread_par ); + //const dim_t parent_n_way = bli_thread_n_way( thread_par ); const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par ); //const dim_t parent_work_id = bli_thread_work_id( thread_par ); +#if 0 // Sanity check: make sure the number of threads in the parent's // communicator is divisible by the number of new sub-groups. if ( parent_nt_in % parent_n_way != 0 ) { printf( "Assertion failed: parent_nt_in (%d) parent_n_way (%d) != 0\n", ( int )parent_nt_in, ( int )parent_n_way ); - bli_abort(); + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } +#endif //dim_t child_nt_in = bli_cntl_calc_num_threads_in( rntm, cntl_chl ); //dim_t child_n_way = bli_rntm_ways_for( bszid_chl, rntm ); @@ -503,16 +572,25 @@ thrinfo_t* bli_thrinfo_create_for_cntl_prenode // parent's chief-ness is equivalent to checking for chief-ness in the new // about-to-be-created communicator group. thrcomm_t* new_comm = NULL; + if ( bli_thread_am_ochief( thread_par ) ) - new_comm = bli_thrcomm_create( rntm, child_nt_in ); + r_val = bli_thrcomm_create( rntm, child_nt_in, &new_comm ); + else + r_val = BLIS_SUCCESS; + + // If the master thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_thread0_return_if_failure( &r_val, thread_par ); // Broadcast the new thrcomm_t address to the other threads in the // parent's group. new_comm = bli_thread_broadcast( thread_par, new_comm ); + thrinfo_t* thread_chl = NULL; + // All threads create a new thrinfo_t node using the communicator // that was created by their chief, as identified by parent_work_id. - thrinfo_t* thread_chl = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm new_comm, // ocomm @@ -521,12 +599,20 @@ thrinfo_t* bli_thrinfo_create_for_cntl_prenode child_work_id, // work_id TRUE, // free_comm bszid_chl, // bszid - NULL // sub_node + NULL, // sub_node + &thread_chl // node ); + // If any thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_threads_return_if_failure( &r_val, thread_par ); + bli_thread_barrier( thread_par ); - return thread_chl; + // Set the thrinfo_t pointer. + *thread = thread_chl; + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- diff --git a/frame/thread/bli_thrinfo.h b/frame/thread/bli_thrinfo.h index 6b98096849..b793f14e20 100644 --- a/frame/thread/bli_thrinfo.h +++ b/frame/thread/bli_thrinfo.h @@ -186,16 +186,17 @@ BLIS_INLINE void bli_thread_barrier( const thrinfo_t* t ) // Prototypes for level-3 thrinfo functions not specific to any operation. // -thrinfo_t* bli_thrinfo_create +err_t bli_thrinfo_create ( - rntm_t* rntm, - thrcomm_t* ocomm, - dim_t ocomm_id, - dim_t n_way, - dim_t work_id, - bool free_comm, - bszid_t bszid, - thrinfo_t* sub_node + rntm_t* rntm, + thrcomm_t* ocomm, + dim_t ocomm_id, + dim_t n_way, + dim_t work_id, + bool free_comm, + bszid_t bszid, + thrinfo_t* sub_node, + thrinfo_t** node ); void bli_thrinfo_init @@ -223,43 +224,47 @@ void bli_thrinfo_free // ----------------------------------------------------------------------------- -void bli_thrinfo_grow +err_t bli_thrinfo_grow ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); -thrinfo_t* bli_thrinfo_rgrow +err_t bli_thrinfo_rgrow ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_cur, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ); -thrinfo_t* bli_thrinfo_create_for_cntl +err_t bli_thrinfo_create_for_cntl ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_chl, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_chl, + thrinfo_t* thread_par, + thrinfo_t** thread_chl ); -thrinfo_t* bli_thrinfo_rgrow_prenode +err_t bli_thrinfo_rgrow_prenode ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_cur, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ); -thrinfo_t* bli_thrinfo_create_for_cntl_prenode +err_t bli_thrinfo_create_for_cntl_prenode ( - rntm_t* rntm, - cntl_t* cntl_par, - cntl_t* cntl_chl, - thrinfo_t* thread_par + rntm_t* rntm, + cntl_t* cntl_par, + cntl_t* cntl_chl, + thrinfo_t* thread_par, + thrinfo_t** thread_chl ); // ----------------------------------------------------------------------------- diff --git a/frame/thread/bli_thrinfo_sup.c b/frame/thread/bli_thrinfo_sup.c index 966247fd04..d0af37e7c3 100644 --- a/frame/thread/bli_thrinfo_sup.c +++ b/frame/thread/bli_thrinfo_sup.c @@ -35,15 +35,17 @@ #include "blis.h" -void bli_thrinfo_sup_grow +err_t bli_thrinfo_sup_grow ( rntm_t* rntm, const bszid_t* bszid_par, thrinfo_t* thread ) { + err_t r_val; + if ( thread == &BLIS_GEMM_SINGLE_THREADED || - thread == &BLIS_PACKM_SINGLE_THREADED ) return; + thread == &BLIS_PACKM_SINGLE_THREADED ) return BLIS_SUCCESS; // NOTE: If bli_thrinfo_sup_rgrow() is being called, the sub_node field will // always be non-NULL, and so there's no need to check it. @@ -53,35 +55,42 @@ void bli_thrinfo_sup_grow // is non-NULL, then it has already been created and we'll use it as-is. if ( bli_thrinfo_sub_node( thread ) == NULL ) { + thrinfo_t* thread_child; + // Create a new node (or, if needed, multiple nodes) along the // main sub-node branch of the tree and return the pointer to the // (highest) child. - thrinfo_t* thread_child = bli_thrinfo_sup_rgrow + r_val = bli_thrinfo_sup_rgrow ( rntm, bszid_par, &bszid_par[1], - thread + thread, + &thread_child ); + bli_check_return_if_failure( r_val ); // Attach the child thrinfo_t node for the primary branch to its // parent structure. bli_thrinfo_set_sub_node( thread_child, thread ); } } + + return BLIS_SUCCESS; } // ----------------------------------------------------------------------------- -thrinfo_t* bli_thrinfo_sup_rgrow +err_t bli_thrinfo_sup_rgrow ( - rntm_t* rntm, - const bszid_t* bszid_par, - const bszid_t* bszid_cur, - thrinfo_t* thread_par + rntm_t* rntm, + const bszid_t* bszid_par, + const bszid_t* bszid_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ) { - thrinfo_t* thread_cur; + err_t r_val; // We must handle two cases: those where the next node in the // control tree is a partitioning node, and those where it is @@ -90,25 +99,31 @@ thrinfo_t* bli_thrinfo_sup_rgrow { // Create the child thrinfo_t node corresponding to cntl_cur, // with cntl_par being the parent. - thread_cur = bli_thrinfo_sup_create_for_cntl + r_val = bli_thrinfo_sup_create_for_cntl ( rntm, bszid_par, bszid_cur, - thread_par + thread_par, + thread_cur ); + bli_check_return_if_failure( r_val ); } else // if ( *bszid_cur == BLIS_NO_PART ) { + thrinfo_t* thread_seg; + // Recursively grow the thread structure and return the top-most // thrinfo_t node of that segment. - thrinfo_t* thread_seg = bli_thrinfo_sup_rgrow + r_val = bli_thrinfo_sup_rgrow ( rntm, bszid_par, &bszid_cur[1], - thread_par + thread_par, + &thread_seg ); + bli_check_return_if_failure( r_val ); // Create a thrinfo_t node corresponding to cntl_cur. Since the // corresponding cntl node, cntl_cur, is a non-partitioning node @@ -119,7 +134,7 @@ thrinfo_t* bli_thrinfo_sup_rgrow // to FALSE since cntl_cur is a non-partitioning node. The reason: // the communicator used here will be freed when thread_seg, or one // of its descendents, is freed. - thread_cur = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm bli_thrinfo_ocomm( thread_seg ), // ocomm @@ -128,28 +143,33 @@ thrinfo_t* bli_thrinfo_sup_rgrow bli_thread_ocomm_id( thread_seg ), // work_id FALSE, // free_comm BLIS_NO_PART, // bszid - thread_seg // sub_node + thread_seg, // sub_node + thread_cur // node ); + bli_check_return_if_failure( r_val ); } - return thread_cur; + return BLIS_SUCCESS; } #define BLIS_NUM_STATIC_COMMS 80 -thrinfo_t* bli_thrinfo_sup_create_for_cntl +err_t bli_thrinfo_sup_create_for_cntl ( - rntm_t* rntm, - const bszid_t* bszid_par, - const bszid_t* bszid_chl, - thrinfo_t* thread_par + rntm_t* rntm, + const bszid_t* bszid_par, + const bszid_t* bszid_chl, + thrinfo_t* thread_par, + thrinfo_t** thread_chl ) { + err_t r_val; + // If we are running with a single thread, all of the code can be reduced // and simplified to this. if ( bli_rntm_calc_num_threads( rntm ) == 1 ) { - thrinfo_t* thread_chl = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm &BLIS_SINGLE_COMM, // ocomm @@ -158,10 +178,12 @@ thrinfo_t* bli_thrinfo_sup_create_for_cntl 0, // work_id FALSE, // free_comm BLIS_NO_PART, // bszid - NULL // sub_node + NULL, // sub_node + thread_chl // node ); + bli_check_return_if_failure( r_val ); - return thread_chl; + return BLIS_SUCCESS; } // The remainder of this function handles the cases involving the use of @@ -189,7 +211,7 @@ thrinfo_t* bli_thrinfo_sup_create_for_cntl // All threads create a new thrinfo_t node using the communicator // that was created by their chief, as identified by parent_work_id. - thrinfo_t* thread_chl = bli_thrinfo_create + r_val = bli_thrinfo_create ( rntm, // rntm NULL, // ocomm @@ -198,93 +220,120 @@ thrinfo_t* bli_thrinfo_sup_create_for_cntl child_work_id, // work_id TRUE, // free_comm *bszid_chl, // bszid - NULL // sub_node + NULL, // sub_node + thread_chl // node ); + bli_check_return_if_failure( r_val ); - return thread_chl; + return BLIS_SUCCESS; } - else - { - // If we are packing at least one of A or B, then we use the general - // approach that employs broadcasts and barriers. - thrcomm_t* static_comms[ BLIS_NUM_STATIC_COMMS ]; - thrcomm_t** new_comms = NULL; + // If we are packing at least one of A or B, then we use the general + // approach that employs broadcasts and barriers. - const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); - const dim_t parent_n_way = bli_thread_n_way( thread_par ); - const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par ); - const dim_t parent_work_id = bli_thread_work_id( thread_par ); + thrcomm_t* static_comms[ BLIS_NUM_STATIC_COMMS ]; + thrcomm_t** new_comms = NULL; - // Sanity check: make sure the number of threads in the parent's - // communicator is divisible by the number of new sub-groups. - if ( parent_nt_in % parent_n_way != 0 ) - { - printf( "Assertion failed: parent_nt_in parent_n_way != 0\n" ); - bli_abort(); - } + //const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); + const dim_t parent_n_way = bli_thread_n_way( thread_par ); + const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par ); + const dim_t parent_work_id = bli_thread_work_id( thread_par ); - // Compute: - // - the number of threads inside the new child comm, - // - the current thread's id within the new communicator, - // - the current thread's work id, given the ways of parallelism - // to be obtained within the next loop. - const dim_t child_nt_in = bli_rntm_calc_num_threads_in( bszid_chl, rntm ); - const dim_t child_n_way = bli_rntm_ways_for( *bszid_chl, rntm ); - const dim_t child_comm_id = parent_comm_id % child_nt_in; - const dim_t child_work_id = child_comm_id / ( child_nt_in / child_n_way ); + #if 0 + // Sanity check: make sure the number of threads in the parent's + // communicator is divisible by the number of new sub-groups. + if ( parent_nt_in % parent_n_way != 0 ) + { + printf( "Assertion failed: parent_nt_in parent_n_way != 0\n" ); + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); + } + #endif + + // Compute: + // - the number of threads inside the new child comm, + // - the current thread's id within the new communicator, + // - the current thread's work id, given the ways of parallelism + // to be obtained within the next loop. + const dim_t child_nt_in = bli_rntm_calc_num_threads_in( bszid_chl, rntm ); + const dim_t child_n_way = bli_rntm_ways_for( *bszid_chl, rntm ); + const dim_t child_comm_id = parent_comm_id % child_nt_in; + const dim_t child_work_id = child_comm_id / ( child_nt_in / child_n_way ); //printf( "thread %d: child_n_way = %d child_nt_in = %d parent_n_way = %d (bszid = %d->%d)\n", (int)child_comm_id, (int)child_nt_in, (int)child_n_way, (int)parent_n_way, (int)bli_cntl_bszid( cntl_par ), (int)bszid_chl ); - // The parent's chief thread creates a temporary array of thrcomm_t - // pointers. - if ( bli_thread_am_ochief( thread_par ) ) + // The parent's chief thread creates a temporary array of thrcomm_t + // pointers. + if ( bli_thread_am_ochief( thread_par ) ) + { + if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) { - err_t r_val; - - if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) - new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ), &r_val ); - else - new_comms = static_comms; + new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ), &r_val ); } + else + { + new_comms = static_comms; r_val = BLIS_SUCCESS; + } + } + else + { + r_val = BLIS_SUCCESS; + } - // Broadcast the temporary array to all threads in the parent's - // communicator. - new_comms = bli_thread_broadcast( thread_par, new_comms ); - - // Chiefs in the child communicator allocate the communicator - // object and store it in the array element corresponding to the - // parent's work id. - if ( child_comm_id == 0 ) - new_comms[ parent_work_id ] = bli_thrcomm_create( rntm, child_nt_in ); + // If the master thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_thread0_return_if_failure( &r_val, thread_par ); - bli_thread_barrier( thread_par ); + // Broadcast the temporary array to all threads in the parent's + // communicator. + new_comms = bli_thread_broadcast( thread_par, new_comms ); - // All threads create a new thrinfo_t node using the communicator - // that was created by their chief, as identified by parent_work_id. - thrinfo_t* thread_chl = bli_thrinfo_create - ( - rntm, // rntm - new_comms[ parent_work_id ], // ocomm - child_comm_id, // ocomm_id - child_n_way, // n_way - child_work_id, // work_id - TRUE, // free_comm - *bszid_chl, // bszid - NULL // sub_node - ); - - bli_thread_barrier( thread_par ); - - // The parent's chief thread frees the temporary array of thrcomm_t - // pointers. - if ( bli_thread_am_ochief( thread_par ) ) - { - if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) - bli_free_intl( new_comms ); - } + // Chiefs in the child communicator allocate the communicator + // object and store it in the array element corresponding to the + // parent's work id. + if ( child_comm_id == 0 ) + { + r_val = bli_thrcomm_create( rntm, child_nt_in, &new_comms[ parent_work_id ] ); + } + else + { + r_val = BLIS_SUCCESS; + } - return thread_chl; + // If any thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_threads_return_if_failure( &r_val, thread_par ); + + bli_thread_barrier( thread_par ); + + // All threads create a new thrinfo_t node using the communicator + // that was created by their chief, as identified by parent_work_id. + r_val = bli_thrinfo_create + ( + rntm, // rntm + new_comms[ parent_work_id ], // ocomm + child_comm_id, // ocomm_id + child_n_way, // n_way + child_work_id, // work_id + TRUE, // free_comm + *bszid_chl, // bszid + NULL, // sub_node + thread_chl // node + ); + + // If any thread generated an error, all threads return immediately. + if ( bli_error_checking_is_enabled() ) + bli_check_threads_return_if_failure( &r_val, thread_par ); + + bli_thread_barrier( thread_par ); + + // The parent's chief thread frees the temporary array of thrcomm_t + // pointers. + if ( bli_thread_am_ochief( thread_par ) ) + { + if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) + bli_free_intl( new_comms ); } + + return BLIS_SUCCESS; } diff --git a/frame/thread/bli_thrinfo_sup.h b/frame/thread/bli_thrinfo_sup.h index 1afcd3337e..c2322b8342 100644 --- a/frame/thread/bli_thrinfo_sup.h +++ b/frame/thread/bli_thrinfo_sup.h @@ -40,27 +40,29 @@ // Prototypes for level-3 thrinfo sup functions. // -void bli_thrinfo_sup_grow +err_t bli_thrinfo_sup_grow ( rntm_t* rntm, const bszid_t* bszid_par, thrinfo_t* thread ); -thrinfo_t* bli_thrinfo_sup_rgrow +err_t bli_thrinfo_sup_rgrow ( - rntm_t* rntm, - const bszid_t* bszid_par, - const bszid_t* bszid_cur, - thrinfo_t* thread_par + rntm_t* rntm, + const bszid_t* bszid_par, + const bszid_t* bszid_cur, + thrinfo_t* thread_par, + thrinfo_t** thread_cur ); -thrinfo_t* bli_thrinfo_sup_create_for_cntl +err_t bli_thrinfo_sup_create_for_cntl ( - rntm_t* rntm, - const bszid_t* bszid_par, - const bszid_t* bszid_chl, - thrinfo_t* thread_par + rntm_t* rntm, + const bszid_t* bszid_par, + const bszid_t* bszid_chl, + thrinfo_t* thread_par, + thrinfo_t** thread_chl ); #endif diff --git a/frame/util/bli_util_check.c b/frame/util/bli_util_check.c index a96f6f5e98..0107e69905 100644 --- a/frame/util/bli_util_check.c +++ b/frame/util/bli_util_check.c @@ -147,7 +147,7 @@ void PASTEMAC(opname,_check) \ const bool* is_eq \ ) \ { \ - bli_l0_xxbsc_check( chi, psi, is_eq ); \ + bli_l0_xxbool_check( chi, psi, is_eq ); \ } GENFRONT( eqsc ) diff --git a/frame/util/bli_util_tapi.c b/frame/util/bli_util_tapi.c index abc9c90890..f64b8800a3 100644 --- a/frame/util/bli_util_tapi.c +++ b/frame/util/bli_util_tapi.c @@ -64,7 +64,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -100,7 +100,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, m ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -143,7 +143,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -190,7 +190,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ + bli_gks_query_cntx_if_null( &cntx ); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -231,7 +231,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ ctype_r norm; \ \ @@ -290,7 +290,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ ctype_r norm; \ \ @@ -355,7 +355,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ if ( bli_zero_dim1( n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ @@ -419,7 +419,7 @@ void PASTEMAC(ch,opname) \ if ( bli_zero_dim1( n ) ) { *is_eq = TRUE; return; } \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ *is_eq = PASTEMAC2(ch,opname,_unb_var1) \ ( \ @@ -456,7 +456,7 @@ void PASTEMAC(ch,opname) \ if ( bli_zero_dim2( m, n ) ) { *is_eq = TRUE; return; } \ \ /* Obtain a valid context from the gks if necessary. */ \ - /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ + /*bli_gks_query_cntx_if_null( &cntx );*/ \ \ /* Invoke the helper variant. */ \ *is_eq = PASTEMAC2(ch,opname,_unb_var1) \ diff --git a/kernels/zen/1/bli_scalv_zen_int10.c b/kernels/zen/1/bli_scalv_zen_int10.c index 7487880b80..f8cd92c4d6 100644 --- a/kernels/zen/1/bli_scalv_zen_int10.c +++ b/kernels/zen/1/bli_scalv_zen_int10.c @@ -82,7 +82,7 @@ void bli_sscalv_zen_int10 { float* zero = bli_s0; - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); ssetv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_FLOAT, BLIS_SETV_KER, cntx ); @@ -276,7 +276,7 @@ void bli_dscalv_zen_int10 { double* zero = bli_d0; - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); dsetv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_DOUBLE, BLIS_SETV_KER, cntx ); @@ -454,7 +454,7 @@ void bli_cscalv_zen_int10 dim_t n, scomplex* restrict alpha, scomplex* restrict x, inc_t incx, - cntx_t* restrict cntx + cntx_t* cntx ) { const num_t dt = BLIS_SCOMPLEX; diff --git a/kernels/zen/1f/bli_axpyf_zen_int_4.c b/kernels/zen/1f/bli_axpyf_zen_int_4.c index ddebc5ee01..10d5fa5731 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_4.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_4.c @@ -46,7 +46,7 @@ void bli_caxpyf_zen_int_4 scomplex* restrict a, inc_t inca, inc_t lda, scomplex* restrict x, inc_t incx, scomplex* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) { inc_t fuse_fac = 4; @@ -79,7 +79,7 @@ void bli_caxpyf_zen_int_4 // operation as a loop over axpyv. if ( b_n != fuse_fac ) { - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); caxpyv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_SCOMPLEX, BLIS_AXPYV_KER, cntx ); diff --git a/kernels/zen/1f/bli_axpyf_zen_int_5.c b/kernels/zen/1f/bli_axpyf_zen_int_5.c index 9c8a370e15..e8abba240d 100644 --- a/kernels/zen/1f/bli_axpyf_zen_int_5.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_5.c @@ -69,7 +69,7 @@ void bli_saxpyf_zen_int_5 float* restrict a, inc_t inca, inc_t lda, float* restrict x, inc_t incx, float* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) { const dim_t fuse_fac = 5; @@ -108,7 +108,7 @@ void bli_saxpyf_zen_int_5 // operation as a loop over axpyv. if ( b_n != fuse_fac ) { - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); saxpyv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_FLOAT, BLIS_AXPYV_KER, cntx ); @@ -321,7 +321,7 @@ void bli_daxpyf_zen_int_5 double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) { const dim_t fuse_fac = 5; @@ -360,7 +360,7 @@ void bli_daxpyf_zen_int_5 // operation as a loop over axpyv. if ( b_n != fuse_fac ) { - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); daxpyv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_DOUBLE, BLIS_AXPYV_KER, cntx ); @@ -572,7 +572,7 @@ void bli_daxpyf_zen_int_16x2 double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) { const dim_t fuse_fac = 2; @@ -857,7 +857,7 @@ void bli_daxpyf_zen_int_16x4 double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict y, inc_t incy, - cntx_t* restrict cntx + cntx_t* cntx ) { const dim_t fuse_fac = 4; @@ -899,7 +899,7 @@ void bli_daxpyf_zen_int_16x4 // operation as a loop over axpyv. if ( b_n != fuse_fac ) { - if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); daxpyv_ker_ft f = bli_cntx_get_ukr_dt( BLIS_DOUBLE, BLIS_AXPYV_KER, cntx ); diff --git a/sandbox/gemmlike/bli_gemm_ex.c b/sandbox/gemmlike/bli_gemm_ex.c index 96dae1a3a9..f0cd566667 100644 --- a/sandbox/gemmlike/bli_gemm_ex.c +++ b/sandbox/gemmlike/bli_gemm_ex.c @@ -72,11 +72,10 @@ void bli_gemm_ex // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Obtain a valid (native) context from the gks if necessary. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Check the operands. if ( bli_error_checking_is_enabled() ) diff --git a/sandbox/gemmlike/bls_gemm.c b/sandbox/gemmlike/bls_gemm.c index ec5d8d5b1f..5e47b96052 100644 --- a/sandbox/gemmlike/bls_gemm.c +++ b/sandbox/gemmlike/bls_gemm.c @@ -75,13 +75,12 @@ void bls_gemm_ex // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Obtain a valid (native) context from the gks if necessary. // NOTE: This must be done before calling the _check() function, since // that function assumes the context pointer is valid. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Check parameters. if ( bli_error_checking_is_enabled() ) diff --git a/sandbox/old/ref99/bli_gemmnat.c b/sandbox/old/ref99/bli_gemmnat.c index 399f31e216..36bba0aaf3 100644 --- a/sandbox/old/ref99/bli_gemmnat.c +++ b/sandbox/old/ref99/bli_gemmnat.c @@ -57,13 +57,12 @@ void bli_gemmnat bli_init_once(); // Obtain a valid (native) context from the gks if necessary. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Invoke the operation's front end. //blx_gemm_front( alpha, a, b, beta, c, cntx, rntm, NULL ); diff --git a/sandbox/power10/bli_gemm_ex.c b/sandbox/power10/bli_gemm_ex.c index 3334dc4a53..5af3d113c2 100644 --- a/sandbox/power10/bli_gemm_ex.c +++ b/sandbox/power10/bli_gemm_ex.c @@ -60,11 +60,10 @@ void bli_gemm_ex // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; - if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } - else { rntm_l = *rntm; rntm = &rntm_l; } + bli_rntm_init_if_null( &rntm, &rntm_l ); // Obtain a valid (native) context from the gks if necessary. - if ( cntx == NULL ) cntx = bli_gks_query_cntx(); + bli_gks_query_cntx_if_null( ( const cntx_t** )&cntx ); \ // Check the operands. if ( bli_error_checking_is_enabled() ) diff --git a/testsuite/src/test_axpy2v.c b/testsuite/src/test_axpy2v.c index 3019d472b2..825ffcd6b9 100644 --- a/testsuite/src/test_axpy2v.c +++ b/testsuite/src/test_axpy2v.c @@ -172,11 +172,11 @@ void libblis_test_axpy2v_experiment obj_t alpha1, alpha2, x, y, z; obj_t z_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_axpyf.c b/testsuite/src/test_axpyf.c index 42ab73018c..d644235fb6 100644 --- a/testsuite/src/test_axpyf.c +++ b/testsuite/src/test_axpyf.c @@ -170,11 +170,11 @@ void libblis_test_axpyf_experiment obj_t alpha, a, x, y; obj_t y_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_dotaxpyv.c b/testsuite/src/test_dotaxpyv.c index 8e09e3ee17..77c2962cd8 100644 --- a/testsuite/src/test_dotaxpyv.c +++ b/testsuite/src/test_dotaxpyv.c @@ -175,11 +175,11 @@ void libblis_test_dotaxpyv_experiment obj_t alpha, xt, x, y, rho, z; obj_t z_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_dotxaxpyf.c b/testsuite/src/test_dotxaxpyf.c index ec519de51e..c2652dc8be 100644 --- a/testsuite/src/test_dotxaxpyf.c +++ b/testsuite/src/test_dotxaxpyf.c @@ -180,11 +180,11 @@ void libblis_test_dotxaxpyf_experiment obj_t alpha, at, a, w, x, beta, y, z; obj_t y_save, z_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_dotxf.c b/testsuite/src/test_dotxf.c index 83f4b44ebe..d01499d7dd 100644 --- a/testsuite/src/test_dotxf.c +++ b/testsuite/src/test_dotxf.c @@ -172,11 +172,11 @@ void libblis_test_dotxf_experiment obj_t alpha, a, x, beta, y; obj_t y_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_gemm_ukr.c b/testsuite/src/test_gemm_ukr.c index 69ee4339da..961f5e4b18 100644 --- a/testsuite/src/test_gemm_ukr.c +++ b/testsuite/src/test_gemm_ukr.c @@ -177,11 +177,11 @@ void libblis_test_gemm_ukr_experiment obj_t ap, bp; obj_t c_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_gemmtrsm_ukr.c b/testsuite/src/test_gemmtrsm_ukr.c index 44ba51587c..f95a06b90e 100644 --- a/testsuite/src/test_gemmtrsm_ukr.c +++ b/testsuite/src/test_gemmtrsm_ukr.c @@ -190,10 +190,10 @@ void libblis_test_gemmtrsm_ukr_experiment dim_t m, n, k; inc_t ldap, ldbp; - char sc_a = 'c'; - char sc_b = 'r'; + char sc_a = 'c'; + char sc_b = 'r'; - side_t side = BLIS_LEFT; + side_t side = BLIS_LEFT; uplo_t uploa; obj_t alpha; @@ -203,11 +203,11 @@ void libblis_test_gemmtrsm_ukr_experiment obj_t a1xp, a11p, bx1p, b11p; obj_t c11_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index eaa0a9cefe..ad5c660eb0 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -742,9 +742,6 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) //char int_type_size_str[8]; gint_t int_type_size; ind_t im; - cntx_t* cntx; - cntx_t* cntx_c; - cntx_t* cntx_z; // If bli_info_get_int_type_size() returns 32 or 64, the size is forced. // Otherwise, the size is chosen automatically. We query the result of @@ -816,6 +813,15 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) bli_rntm_set_ways_for_op( BLIS_TRSM, BLIS_LEFT, m, n, k, &trsm_l ); bli_rntm_set_ways_for_op( BLIS_TRSM, BLIS_RIGHT, m, n, k, &trsm_r ); + // Query an arch_t id. + arch_t arch_id; + bli_arch_query_id( &arch_id ); + + // Use the arch_t id we just queried to query the corresponding architecture + // string. + const char* arch_str; + bli_arch_string( arch_id, &arch_str ); + // Output some system parameters. libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS library info -------------------------------------\n" ); @@ -824,7 +830,7 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS configuration info ---\n" ); libblis_test_fprintf_c( os, "\n" ); - libblis_test_fprintf_c( os, "active sub-configuration %s\n", bli_arch_string( bli_arch_query_id() ) ); + libblis_test_fprintf_c( os, "active sub-configuration %s\n", arch_str ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "BLIS integer type size (bits) %d\n", ( int )int_type_size ); libblis_test_fprintf_c( os, "\n" ); @@ -907,65 +913,102 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) libblis_test_fprintf_c( os, " jr/ir loops %s\n", jrir_str ); libblis_test_fprintf_c( os, "\n" ); + const char* opim_str[ BLIS_NUM_FP_TYPES ] + [ BLIS_NUM_LEVEL3_OPS ]; + + // Iterate over the list of supported floating-point datatypes -- + // BLIS_FLOAT, _DOUBLE, _SCOMPLEX, _DCOMPLEX -- and for each query a + // pointer to the operation implementation string into the appropriate + // location within the opim_str array. + for ( num_t dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) + { + bli_info_get_gemm_impl_string( dt, &opim_str[dt][BLIS_GEMM] ); + bli_info_get_hemm_impl_string( dt, &opim_str[dt][BLIS_HEMM] ); + bli_info_get_herk_impl_string( dt, &opim_str[dt][BLIS_HERK] ); + bli_info_get_her2k_impl_string( dt, &opim_str[dt][BLIS_HER2K] ); + bli_info_get_symm_impl_string( dt, &opim_str[dt][BLIS_SYMM] ); + bli_info_get_syrk_impl_string( dt, &opim_str[dt][BLIS_SYRK] ); + bli_info_get_syr2k_impl_string( dt, &opim_str[dt][BLIS_SYR2K] ); + bli_info_get_trmm_impl_string( dt, &opim_str[dt][BLIS_TRMM] ); + bli_info_get_trmm3_impl_string( dt, &opim_str[dt][BLIS_TRMM3] ); + bli_info_get_trsm_impl_string( dt, &opim_str[dt][BLIS_TRSM] ); + } + libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS default implementations ---\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "level-3 implementations s d c z\n" ); libblis_test_fprintf_c( os, " gemm %7s %7s %7s %7s\n", - bli_info_get_gemm_impl_string( BLIS_FLOAT ), - bli_info_get_gemm_impl_string( BLIS_DOUBLE ), - bli_info_get_gemm_impl_string( BLIS_SCOMPLEX ), - bli_info_get_gemm_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_GEMM], + opim_str[BLIS_DOUBLE ][BLIS_GEMM], + opim_str[BLIS_SCOMPLEX][BLIS_GEMM], + opim_str[BLIS_DCOMPLEX][BLIS_GEMM] ); libblis_test_fprintf_c( os, " hemm %7s %7s %7s %7s\n", - bli_info_get_hemm_impl_string( BLIS_FLOAT ), - bli_info_get_hemm_impl_string( BLIS_DOUBLE ), - bli_info_get_hemm_impl_string( BLIS_SCOMPLEX ), - bli_info_get_hemm_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_HEMM], + opim_str[BLIS_DOUBLE ][BLIS_HEMM], + opim_str[BLIS_SCOMPLEX][BLIS_HEMM], + opim_str[BLIS_DCOMPLEX][BLIS_HEMM] ); libblis_test_fprintf_c( os, " herk %7s %7s %7s %7s\n", - bli_info_get_herk_impl_string( BLIS_FLOAT ), - bli_info_get_herk_impl_string( BLIS_DOUBLE ), - bli_info_get_herk_impl_string( BLIS_SCOMPLEX ), - bli_info_get_herk_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_HERK], + opim_str[BLIS_DOUBLE ][BLIS_HERK], + opim_str[BLIS_SCOMPLEX][BLIS_HERK], + opim_str[BLIS_DCOMPLEX][BLIS_HERK] ); libblis_test_fprintf_c( os, " her2k %7s %7s %7s %7s\n", - bli_info_get_her2k_impl_string( BLIS_FLOAT ), - bli_info_get_her2k_impl_string( BLIS_DOUBLE ), - bli_info_get_her2k_impl_string( BLIS_SCOMPLEX ), - bli_info_get_her2k_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_HER2K], + opim_str[BLIS_DOUBLE ][BLIS_HER2K], + opim_str[BLIS_SCOMPLEX][BLIS_HER2K], + opim_str[BLIS_DCOMPLEX][BLIS_HER2K] ); libblis_test_fprintf_c( os, " symm %7s %7s %7s %7s\n", - bli_info_get_symm_impl_string( BLIS_FLOAT ), - bli_info_get_symm_impl_string( BLIS_DOUBLE ), - bli_info_get_symm_impl_string( BLIS_SCOMPLEX ), - bli_info_get_symm_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_SYMM], + opim_str[BLIS_DOUBLE ][BLIS_SYMM], + opim_str[BLIS_SCOMPLEX][BLIS_SYMM], + opim_str[BLIS_DCOMPLEX][BLIS_SYMM] ); libblis_test_fprintf_c( os, " syrk %7s %7s %7s %7s\n", - bli_info_get_syrk_impl_string( BLIS_FLOAT ), - bli_info_get_syrk_impl_string( BLIS_DOUBLE ), - bli_info_get_syrk_impl_string( BLIS_SCOMPLEX ), - bli_info_get_syrk_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_SYRK], + opim_str[BLIS_DOUBLE ][BLIS_SYRK], + opim_str[BLIS_SCOMPLEX][BLIS_SYRK], + opim_str[BLIS_DCOMPLEX][BLIS_SYRK] ); libblis_test_fprintf_c( os, " syr2k %7s %7s %7s %7s\n", - bli_info_get_syr2k_impl_string( BLIS_FLOAT ), - bli_info_get_syr2k_impl_string( BLIS_DOUBLE ), - bli_info_get_syr2k_impl_string( BLIS_SCOMPLEX ), - bli_info_get_syr2k_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_SYR2K], + opim_str[BLIS_DOUBLE ][BLIS_SYR2K], + opim_str[BLIS_SCOMPLEX][BLIS_SYR2K], + opim_str[BLIS_DCOMPLEX][BLIS_SYR2K] ); libblis_test_fprintf_c( os, " trmm %7s %7s %7s %7s\n", - bli_info_get_trmm_impl_string( BLIS_FLOAT ), - bli_info_get_trmm_impl_string( BLIS_DOUBLE ), - bli_info_get_trmm_impl_string( BLIS_SCOMPLEX ), - bli_info_get_trmm_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_TRMM], + opim_str[BLIS_DOUBLE ][BLIS_TRMM], + opim_str[BLIS_SCOMPLEX][BLIS_TRMM], + opim_str[BLIS_DCOMPLEX][BLIS_TRMM] ); libblis_test_fprintf_c( os, " trmm3 %7s %7s %7s %7s\n", - bli_info_get_trmm3_impl_string( BLIS_FLOAT ), - bli_info_get_trmm3_impl_string( BLIS_DOUBLE ), - bli_info_get_trmm3_impl_string( BLIS_SCOMPLEX ), - bli_info_get_trmm3_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_TRMM3], + opim_str[BLIS_DOUBLE ][BLIS_TRMM3], + opim_str[BLIS_SCOMPLEX][BLIS_TRMM3], + opim_str[BLIS_DCOMPLEX][BLIS_TRMM3] ); libblis_test_fprintf_c( os, " trsm %7s %7s %7s %7s\n", - bli_info_get_trsm_impl_string( BLIS_FLOAT ), - bli_info_get_trsm_impl_string( BLIS_DOUBLE ), - bli_info_get_trsm_impl_string( BLIS_SCOMPLEX ), - bli_info_get_trsm_impl_string( BLIS_DCOMPLEX ) ); + opim_str[BLIS_FLOAT ][BLIS_TRSM], + opim_str[BLIS_DOUBLE ][BLIS_TRSM], + opim_str[BLIS_SCOMPLEX][BLIS_TRSM], + opim_str[BLIS_DCOMPLEX][BLIS_TRSM] ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "\n" ); //bli_ind_disable_all(); + const char* cimpl_str[ BLIS_NUM_FP_TYPES ]; + + // For each of the complex datatypes, query a pointer to a string that + // describes the complex implementation (e.g. "1m" or "native"). We only + // report the string for gemm since currently all level-3 operations use + // the same implementation method. This may change in the future if, for + // example, new level-3-like operations are added to BLIS that don't have + // complex domain analogues, or if those complex analogues cannot be + // cleanly expressed via the 1m method. If/when that happens, it would be + // appropriate to list the implementation method on a per-operation basis, + // for all level-3 operations (instead of for only gemm). + bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_SCOMPLEX, + &cimpl_str[BLIS_SCOMPLEX] ); + bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_DCOMPLEX, + &cimpl_str[BLIS_DCOMPLEX] ); + bli_ind_oper_enable_only( BLIS_GEMM, BLIS_NAT, BLIS_SCOMPLEX ); bli_ind_oper_enable_only( BLIS_GEMM, BLIS_NAT, BLIS_DCOMPLEX ); @@ -973,12 +1016,13 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " c z \n" ); libblis_test_fprintf_c( os, "complex implementation %7s %7s\n", - bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_SCOMPLEX ), - bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_DCOMPLEX ) ); + cimpl_str[BLIS_SCOMPLEX], + cimpl_str[BLIS_DCOMPLEX] ); libblis_test_fprintf_c( os, "\n" ); // Query a native context. - cntx = ( cntx_t* )bli_gks_query_nat_cntx(); + const cntx_t* cntx; + bli_gks_query_nat_cntx( &cntx ); libblis_test_fprintf_c( os, "level-3 blocksizes s d c z \n" ); libblis_test_fprintf_c( os, " mc %7d %7d %7d %7d\n", @@ -1035,32 +1079,50 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_NR, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NR, cntx ) ); libblis_test_fprintf_c( os, "\n" ); + + + const char* ki_str[ BLIS_NUM_FP_TYPES ] + [ BLIS_NUM_UKRS ]; + + // Iterate over the list of supported floating-point datatypes -- + // BLIS_FLOAT, _DOUBLE, _SCOMPLEX, _DCOMPLEX -- and for each query a + // pointer to the microkernel implementation string into the appropriate + // location within the ki_str array. + for ( num_t dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) + { + bli_info_get_gemm_ukr_impl_string( BLIS_NAT, dt, &ki_str[dt][BLIS_GEMM_UKR] ); + bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, dt, &ki_str[dt][BLIS_GEMMTRSM_L_UKR] ); + bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, dt, &ki_str[dt][BLIS_GEMMTRSM_U_UKR] ); + bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, dt, &ki_str[dt][BLIS_TRSM_L_UKR] ); + bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, dt, &ki_str[dt][BLIS_TRSM_U_UKR] ); + } + libblis_test_fprintf_c( os, "micro-kernel types s d c z\n" ); libblis_test_fprintf_c( os, " gemm %7s %7s %7s %7s\n", - bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), - bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), - bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), - bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); + ki_str[BLIS_FLOAT ][BLIS_GEMM_UKR], + ki_str[BLIS_DOUBLE ][BLIS_GEMM_UKR], + ki_str[BLIS_SCOMPLEX][BLIS_GEMM_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMM_UKR] ); libblis_test_fprintf_c( os, " gemmtrsm_l %7s %7s %7s %7s\n", - bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), - bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), - bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), - bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); + ki_str[BLIS_FLOAT ][BLIS_GEMMTRSM_L_UKR], + ki_str[BLIS_DOUBLE ][BLIS_GEMMTRSM_L_UKR], + ki_str[BLIS_SCOMPLEX][BLIS_GEMMTRSM_L_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMMTRSM_L_UKR] ); libblis_test_fprintf_c( os, " gemmtrsm_u %7s %7s %7s %7s\n", - bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), - bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), - bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), - bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); + ki_str[BLIS_FLOAT ][BLIS_GEMMTRSM_U_UKR], + ki_str[BLIS_DOUBLE ][BLIS_GEMMTRSM_U_UKR], + ki_str[BLIS_SCOMPLEX][BLIS_GEMMTRSM_U_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMMTRSM_U_UKR] ); libblis_test_fprintf_c( os, " trsm_l %7s %7s %7s %7s\n", - bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), - bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), - bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), - bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); + ki_str[BLIS_FLOAT ][BLIS_TRSM_L_UKR], + ki_str[BLIS_DOUBLE ][BLIS_TRSM_L_UKR], + ki_str[BLIS_SCOMPLEX][BLIS_TRSM_L_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_TRSM_L_UKR] ); libblis_test_fprintf_c( os, " trsm_u %7s %7s %7s %7s\n", - bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), - bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), - bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), - bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); + ki_str[BLIS_FLOAT ][BLIS_TRSM_U_UKR], + ki_str[BLIS_DOUBLE ][BLIS_TRSM_U_UKR], + ki_str[BLIS_SCOMPLEX][BLIS_TRSM_U_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_TRSM_U_UKR] ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "micro-kernel prefers rows? s d c z\n" ); @@ -1102,70 +1164,89 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) bli_ind_oper_enable_only( BLIS_GEMM, im, BLIS_SCOMPLEX ); bli_ind_oper_enable_only( BLIS_GEMM, im, BLIS_DCOMPLEX ); + bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_SCOMPLEX, + &cimpl_str[BLIS_SCOMPLEX] ); + bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_DCOMPLEX, + &cimpl_str[BLIS_DCOMPLEX] ); + //libblis_test_fprintf_c( os, " c z \n" ); libblis_test_fprintf_c( os, " c z \n" ); libblis_test_fprintf_c( os, "complex implementation %7s %7s\n", - bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_SCOMPLEX ), - bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_DCOMPLEX ) ); + cimpl_str[BLIS_SCOMPLEX], + cimpl_str[BLIS_DCOMPLEX] ); libblis_test_fprintf_c( os, "\n" ); // Query a native context. NOTE: Now that we've removed the dt argument from // bli_gks_query_ind_cntx(), we can consolidate cntx_c and cntx_z; there is // no need to query two contexts since they are the same. - cntx_c = ( cntx_t* )bli_gks_query_ind_cntx( im ); - cntx_z = ( cntx_t* )bli_gks_query_ind_cntx( im ); + const cntx_t* cntx_c; + bli_gks_query_ind_cntx( im, &cntx_c ); libblis_test_fprintf_c( os, "level-3 blocksizes c z \n" ); libblis_test_fprintf_c( os, " mc %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_MC, cntx_c ), - ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MC, cntx_z ) ); + ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MC, cntx_c ) ); libblis_test_fprintf_c( os, " kc %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_KC, cntx_c ), - ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_KC, cntx_z ) ); + ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_KC, cntx_c ) ); libblis_test_fprintf_c( os, " nc %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_NC, cntx_c ), - ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NC, cntx_z ) ); + ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NC, cntx_c ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mc maximum %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_MC, cntx_c ), - ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MC, cntx_z ) ); + ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MC, cntx_c ) ); libblis_test_fprintf_c( os, " kc maximum %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_KC, cntx_c ), - ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_KC, cntx_z ) ); + ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_KC, cntx_c ) ); libblis_test_fprintf_c( os, " nc maximum %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_NC, cntx_c ), - ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NC, cntx_z ) ); + ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NC, cntx_c ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mr %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_MR, cntx_c ), - ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MR, cntx_z ) ); + ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MR, cntx_c ) ); libblis_test_fprintf_c( os, " nr %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_NR, cntx_c ), - ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NR, cntx_z ) ); + ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NR, cntx_c ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mr packdim %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_MR, cntx_c ), - ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MR, cntx_z ) ); + ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MR, cntx_c ) ); libblis_test_fprintf_c( os, " nr packdim %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_NR, cntx_c ), - ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NR, cntx_z ) ); + ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NR, cntx_c ) ); libblis_test_fprintf_c( os, "\n" ); + + // Iterate over the list of supported floating-point datatypes -- + // BLIS_FLOAT, _DOUBLE, _SCOMPLEX, _DCOMPLEX -- and for each query a + // pointer to the microkernel implementation string into the appropriate + // location within the ki_str array. + for ( num_t dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) + { + bli_info_get_gemm_ukr_impl_string( im, dt, &ki_str[dt][BLIS_GEMM_UKR] ); + bli_info_get_gemmtrsm_l_ukr_impl_string( im, dt, &ki_str[dt][BLIS_GEMMTRSM_L_UKR] ); + bli_info_get_gemmtrsm_u_ukr_impl_string( im, dt, &ki_str[dt][BLIS_GEMMTRSM_U_UKR] ); + bli_info_get_trsm_l_ukr_impl_string( im, dt, &ki_str[dt][BLIS_TRSM_L_UKR] ); + bli_info_get_trsm_u_ukr_impl_string( im, dt, &ki_str[dt][BLIS_TRSM_U_UKR] ); + } + libblis_test_fprintf_c( os, "micro-kernel types c z\n" ); libblis_test_fprintf_c( os, " gemm %7s %7s\n", - bli_info_get_gemm_ukr_impl_string( im, BLIS_SCOMPLEX ), - bli_info_get_gemm_ukr_impl_string( im, BLIS_DCOMPLEX ) ); + ki_str[BLIS_SCOMPLEX][BLIS_GEMM_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMM_UKR] ); libblis_test_fprintf_c( os, " gemmtrsm_l %7s %7s\n", - bli_info_get_gemmtrsm_l_ukr_impl_string( im, BLIS_SCOMPLEX ), - bli_info_get_gemmtrsm_l_ukr_impl_string( im, BLIS_DCOMPLEX ) ); + ki_str[BLIS_SCOMPLEX][BLIS_GEMMTRSM_L_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMMTRSM_L_UKR] ); libblis_test_fprintf_c( os, " gemmtrsm_u %7s %7s\n", - bli_info_get_gemmtrsm_u_ukr_impl_string( im, BLIS_SCOMPLEX ), - bli_info_get_gemmtrsm_u_ukr_impl_string( im, BLIS_DCOMPLEX ) ); + ki_str[BLIS_SCOMPLEX][BLIS_GEMMTRSM_U_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_GEMMTRSM_U_UKR] ); libblis_test_fprintf_c( os, " trsm_l %7s %7s\n", - bli_info_get_trsm_l_ukr_impl_string( im, BLIS_SCOMPLEX ), - bli_info_get_trsm_l_ukr_impl_string( im, BLIS_DCOMPLEX ) ); + ki_str[BLIS_SCOMPLEX][BLIS_TRSM_L_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_TRSM_L_UKR] ); libblis_test_fprintf_c( os, " trsm_u %7s %7s\n", - bli_info_get_trsm_u_ukr_impl_string( im, BLIS_SCOMPLEX ), - bli_info_get_trsm_u_ukr_impl_string( im, BLIS_DCOMPLEX ) ); + ki_str[BLIS_SCOMPLEX][BLIS_TRSM_U_UKR], + ki_str[BLIS_DCOMPLEX][BLIS_TRSM_U_UKR] ); libblis_test_fprintf_c( os, "\n" ); } @@ -1605,7 +1686,6 @@ void libblis_test_op_driver double perf, resid; char* pass_str; - char* ind_str; char blank_str[32]; char funcname_str[64]; char dims_str[64]; @@ -2209,7 +2289,9 @@ void libblis_test_op_driver // Query the implementation string associated with the // current operation and datatype. If the operation is // not level-3, we will always get back the native string. - ind_str = ( char* )bli_ind_oper_get_avail_impl_string( op->opid, datatype ); + char* ind_str; + bli_ind_oper_get_avail_impl_string( op->opid, datatype, + ( const char** )&ind_str ); // Loop over the requested parameter combinations. for ( pci = 0; pci < n_param_combos; ++pci ) diff --git a/testsuite/src/test_trsm_ukr.c b/testsuite/src/test_trsm_ukr.c index 5f4988e1c7..1e59a06fd0 100644 --- a/testsuite/src/test_trsm_ukr.c +++ b/testsuite/src/test_trsm_ukr.c @@ -172,21 +172,21 @@ void libblis_test_trsm_ukr_experiment dim_t m, n; - char sc_a = 'c'; - char sc_b = 'r'; + char sc_a = 'c'; + char sc_b = 'r'; - side_t side = BLIS_LEFT; + side_t side = BLIS_LEFT; uplo_t uploa; obj_t a, b, c; obj_t ap, bp; obj_t c_save; - cntx_t* cntx; + cntx_t* cntx = NULL; // Query a context. - cntx = ( cntx_t* )bli_gks_query_cntx(); + bli_gks_query_cntx( ( const cntx_t** )&cntx ); \ // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype );