diff --git a/kokkos/basic/Box.hpp b/kokkos/basic/Box.hpp deleted file mode 100644 index 62046e4..0000000 --- a/kokkos/basic/Box.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _Box_hpp_ -#define _Box_hpp_ - -/** - * a 'Box' is 3 pairs of ints, where each pair specifies a lower - * and upper bound for one of the 3 spatial dimensions. - * - * This struct stores the 3 pairs as a simple array of 6 ints, - * but defines the bracket operator so that it can be referenced - * using 2-dimensional array notation like this: - * int xmin = box[0][0]; int xmax = box[0][1]; - * int ymin = box[1][0]; int ymax = box[1][1]; - * int zmin = box[2][0]; int zmax = box[2][1]; - */ -struct Box { - int ranges[6]; - int* operator[](int xyz) { return &ranges[xyz*2]; } - const int* operator[](int xyz) const { return &ranges[xyz*2]; } -}; - -#endif - diff --git a/kokkos/basic/BoxIterator.hpp b/kokkos/basic/BoxIterator.hpp deleted file mode 100644 index f644119..0000000 --- a/kokkos/basic/BoxIterator.hpp +++ /dev/null @@ -1,143 +0,0 @@ -#ifndef _BoxTraverser_hpp_ -#define _BoxTraverser_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -namespace miniFE { - -/** Class for traversing a 3-dimensional 'box' of indices. - - //One way to traverse a 'box[3][2]' is to use a triply-nested for-loop: - for(int z=box[2][0]; z= box_[0][1]) { - x = box_[0][0]; - ++y; - if (y >= box_[1][1]) { - y = box_[1][0]; - ++z; - if (z >= box_[2][1]) { - z = box_[2][1]; - y = box_[1][1]; - x = box_[0][1]; - } - } - } - return *this; - } - - BoxIterator operator++(int) - { - BoxIterator temp = *this; - ++(*this); - return temp; - } - - bool operator==(const BoxIterator& rhs) const - { - return x == rhs.x && y == rhs.y && z == rhs.z; - } - - bool operator!=(const BoxIterator& rhs) const - { - return !(this->operator==(rhs)); - } - - int x; - int y; - int z; - -private: - BoxIterator(const Box& box, bool at_end = false) - : x(box[0][0]), - y(box[1][0]), - z(box[2][0]), - box_() - { - box_[0][0] = box[0][0]; box_[0][1] = box[0][1]; - box_[1][0] = box[1][0]; box_[1][1] = box[1][1]; - box_[2][0] = box[2][0]; box_[2][1] = box[2][1]; - if (at_end) { - x = box[0][1]; - y = box[1][1]; - z = box[2][1]; - } - } - - Box box_; -};//class BoxTraverser - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/BoxPartition.cpp b/kokkos/basic/BoxPartition.cpp deleted file mode 100644 index 2a4e5a7..0000000 --- a/kokkos/basic/BoxPartition.cpp +++ /dev/null @@ -1,477 +0,0 @@ - -#include -#include - -#include -#include - -/*--------------------------------------------------------------------*/ - -static int box_map_local_entry( const Box& box , - const int ghost , - int local_x , - int local_y , - int local_z ) -{ - const int nx = 2 * ghost + box[0][1] - box[0][0] ; - const int ny = 2 * ghost + box[1][1] - box[1][0] ; - const int nz = 2 * ghost + box[2][1] - box[2][0] ; - int result = -1 ; - - local_x += ghost ; - local_y += ghost ; - local_z += ghost ; - - if ( 0 <= local_x && local_x < nx && - 0 <= local_y && local_y < ny && - 0 <= local_z && local_z < nz ) { - - result = local_z * ny * nx + local_y * nx + local_x ; - } - return result ; -} - -int box_map_local( const Box& box_local, - const int ghost , - const int box_local_map[] , - const int local_x , - const int local_y , - const int local_z ) -{ - int result = box_map_local_entry(box_local,ghost,local_x,local_y,local_z); - - if ( 0 <= result ) { - result = box_local_map[ result ]; - } - - return result ; -} - -/*--------------------------------------------------------------------*/ -/* Recursively split a box into into (up-ip) sub-boxes */ - -void box_partition( int ip , int up , int axis , - const Box& box, - Box* p_box ) -{ - const int np = up - ip ; - if ( 1 == np ) { - p_box[ip][0][0] = box[0][0] ; p_box[ip][0][1] = box[0][1] ; - p_box[ip][1][0] = box[1][0] ; p_box[ip][1][1] = box[1][1] ; - p_box[ip][2][0] = box[2][0] ; p_box[ip][2][1] = box[2][1] ; - } - else { - const int n = box[ axis ][1] - box[ axis ][0] ; - const int np_low = np / 2 ; /* Rounded down */ - const int np_upp = np - np_low ; - - const int n_upp = (int) (((double) n) * ( ((double)np_upp) / ((double)np))); - const int n_low = n - n_upp ; - const int next_axis = ( axis + 2 ) % 3 ; - - if ( np_low ) { /* P = [ip,ip+np_low) */ - Box dbox ; - dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; - dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; - dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; - - dbox[ axis ][1] = dbox[ axis ][0] + n_low ; - - box_partition( ip, ip + np_low, next_axis, dbox, p_box ); - } - - if ( np_upp ) { /* P = [ip+np_low,ip+np_low+np_upp) */ - Box dbox; - dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; - dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; - dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; - - ip += np_low ; - dbox[ axis ][0] += n_low ; - dbox[ axis ][1] = dbox[ axis ][0] + n_upp ; - - box_partition( ip, ip + np_upp, next_axis, dbox, p_box ); - } - } -} - -/*--------------------------------------------------------------------*/ - -static int box_disjoint( const Box& a , const Box& b) -{ - return a[0][1] <= b[0][0] || b[0][1] <= a[0][0] || - a[1][1] <= b[1][0] || b[1][1] <= a[1][0] || - a[2][1] <= b[2][0] || b[2][1] <= a[2][0] ; -} - -static void resize_int( int ** a , int * allocLen , int newLen ) -{ - int k = 32; - while ( k < newLen ) { k <<= 1 ; } - if ( NULL == *a ) - { *a = (int*)malloc( sizeof(int)*(*allocLen = k) ); } - else if ( *allocLen < k ) - { *a = (int*)realloc(*a , sizeof(int)*(*allocLen = k)); } -} - -static void box_partition_maps( - const int np , - const int my_p , - const Box* pbox, - const int ghost , - int ** map_local_id , - int ** map_recv_pc , - int ** map_send_pc , - int ** map_send_id ) -{ - const Box& my_box = pbox[my_p] ; - - const int my_ix = my_box[0][0] ; - const int my_iy = my_box[1][0] ; - const int my_iz = my_box[2][0] ; - const int my_nx = my_box[0][1] - my_box[0][0] ; - const int my_ny = my_box[1][1] - my_box[1][0] ; - const int my_nz = my_box[2][1] - my_box[2][0] ; - - const int my_use_nx = 2 * ghost + my_nx ; - const int my_use_ny = 2 * ghost + my_ny ; - const int my_use_nz = 2 * ghost + my_nz ; - - const int id_length = my_use_nx * my_use_ny * my_use_nz ; - - int * local_id = (int *) malloc( id_length * sizeof(int) ); - int * recv_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); - int * send_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); - - int * send_id = NULL ; - int send_id_size = 0 ; - - int iLocal , iSend ; - int i ; - - Box my_use_box; - - my_use_box[0][0] = my_box[0][0] - ghost ; - my_use_box[0][1] = my_box[0][1] + ghost ; - my_use_box[1][0] = my_box[1][0] - ghost ; - my_use_box[1][1] = my_box[1][1] + ghost ; - my_use_box[2][0] = my_box[2][0] - ghost ; - my_use_box[2][1] = my_box[2][1] + ghost ; - - for ( i = 0 ; i < id_length ; ++i ) { local_id[i] = -1 ; } - - iSend = 0 ; - iLocal = 0 ; - - /* The vector space is partitioned by processors */ - - for ( i = 0 ; i < np ; ++i ) { - const int ip = ( i + my_p ) % np ; - recv_pc[i] = iLocal ; - send_pc[i] = iSend ; - - if ( ! box_disjoint( my_use_box , pbox[ip] ) ) { - const int p_ix = pbox[ip][0][0] ; - const int p_iy = pbox[ip][1][0] ; - const int p_iz = pbox[ip][2][0] ; - const int p_ex = pbox[ip][0][1] ; - const int p_ey = pbox[ip][1][1] ; - const int p_ez = pbox[ip][2][1] ; - - int local_x , local_y , local_z ; - - /* Run the span of global cells that my processor uses */ - - for ( local_z = -ghost ; local_z < my_nz + ghost ; ++local_z ) { - for ( local_y = -ghost ; local_y < my_ny + ghost ; ++local_y ) { - for ( local_x = -ghost ; local_x < my_nx + ghost ; ++local_x ) { - - const int global_z = local_z + my_iz ; - const int global_y = local_y + my_iy ; - const int global_x = local_x + my_ix ; - - const int entry = - box_map_local_entry(my_box,ghost,local_x,local_y,local_z); - - if ( entry < 0 ) { abort(); } - - if ( p_iz <= global_z && global_z < p_ez && - p_iy <= global_y && global_y < p_ey && - p_ix <= global_x && global_x < p_ex ) { - - /* This ordinal is owned by processor 'ip' */ - - local_id[ entry ] = iLocal++ ; - -#if defined(DEBUG_PRINT) -if ( my_p != ip ) { - fprintf(stdout," (%d,%d,%d) : P%d recv at local %d from P%d\n", - global_x,global_y,global_z,my_p,local_id[entry],ip); - fflush(stdout); -} -#endif - } - - /* If in my ownership and used by the other processor */ - if ( my_p != ip && - /* In my ownership: */ - ( 0 <= local_z && local_z < my_nz && - 0 <= local_y && local_y < my_ny && - 0 <= local_x && local_x < my_nx ) && - /* In other processors usage: */ - ( p_iz - ghost <= global_z && global_z < p_ez + ghost && - p_iy - ghost <= global_y && global_y < p_ey + ghost && - p_ix - ghost <= global_x && global_x < p_ex + ghost ) ) { - - resize_int( & send_id , & send_id_size , (iSend + 1) ); - send_id[ iSend ] = local_id[ entry ] ; - ++iSend ; - -#if defined(DEBUG_PRINT) -{ - fprintf(stdout," (%d,%d,%d) : P%d send at local %d to P%d\n", - global_x,global_y,global_z,my_p,local_id[entry],ip); - fflush(stdout); -} -#endif - } - } - } - } - } - } - recv_pc[np] = iLocal ; - send_pc[np] = iSend ; - - *map_local_id = local_id ; - *map_recv_pc = recv_pc ; - *map_send_pc = send_pc ; - *map_send_id = send_id ; -} - -void box_partition_rcb( const int np , - const int my_p , - const Box& root_box, - const int ghost , - Box** pbox, - int ** map_local_id , - int ** map_recv_pc , - int ** map_send_pc , - int ** map_send_id ) -{ - *pbox = new Box[ np ]; - - box_partition( 0 , np , 2 , root_box , *pbox ); - - box_partition_maps( np , my_p , *pbox , ghost , - map_local_id , map_recv_pc , - map_send_pc , map_send_id ); -} - -/*--------------------------------------------------------------------*/ - -#ifdef UNIT_TEST - -static int box_contain( const Box& a , const Box& b ) -{ - return a[0][0] <= b[0][0] && b[0][1] <= a[0][1] && - a[1][0] <= b[1][0] && b[1][1] <= a[1][1] && - a[2][0] <= b[2][0] && b[2][1] <= a[2][1] ; -} - -static void box_print( FILE * fp , const Box& a ) -{ - fprintf(fp,"{ [ %d , %d ) , [ %d , %d ) , [ %d , %d ) }", - a[0][0] , a[0][1] , - a[1][0] , a[1][1] , - a[2][0] , a[2][1] ); -} - -static void test_box( const Box& box , const int np ) -{ - const int ncell_box = box[0][1] * box[1][1] * box[2][1] ; - int ncell_total = 0 ; - int ncell_min = ncell_box ; - int ncell_max = 0 ; - std::vector pbox(np); - int i , j ; - - box_partition( 0 , np , 2 , box , &pbox[0] ); - - for ( i = 0 ; i < np ; ++i ) { - const int ncell = ( pbox[i][0][1] - pbox[i][0][0] ) * - ( pbox[i][1][1] - pbox[i][1][0] ) * - ( pbox[i][2][1] - pbox[i][2][0] ); - - if ( ! box_contain( box , pbox[i] ) ) { - fprintf(stdout," OUT OF BOUNDS pbox[%d/%d] = ",i,np); - box_print(stdout,pbox[i]); - fprintf(stdout,"\n"); - abort(); - } - - for ( j = i + 1 ; j < np ; ++j ) { - if ( ! box_disjoint( pbox[i] , pbox[j] ) ) { - fprintf(stdout," NOT DISJOINT pbox[%d/%d] = ",i,np); - box_print(stdout, pbox[i]); - fprintf(stdout,"\n"); - fprintf(stdout," pbox[%d/%d] = ",j,np); - box_print(stdout, pbox[j]); - fprintf(stdout,"\n"); - abort(); - } - } - ncell_total += ncell ; - - if ( ncell_max < ncell ) { ncell_max = ncell ; } - if ( ncell < ncell_min ) { ncell_min = ncell ; } - } - - if ( ncell_total != ncell_box ) { - fprintf(stdout," WRONG CELL COUNT NP = %d\n",np); - abort(); - } - fprintf(stdout,"NP = %d, total = %d, avg = %d, min = %d, max = %d\n", - np,ncell_box,ncell_box/np,ncell_min,ncell_max); -} - -/*--------------------------------------------------------------------*/ - -static void test_maps( const Box& root_box , const int np ) -{ - const int ghost = 1 ; - const int nx_global = root_box[0][1] - root_box[0][0] ; - const int ny_global = root_box[1][1] - root_box[1][0] ; - int ieq , i , j ; - std::vector pbox(np); - int **local_values ; - int **map_local_id ; - int **map_recv_pc ; - int **map_send_pc ; - int **map_send_id ; - - box_partition( 0 , np , 2 , root_box , &pbox[0] ); - - local_values = (int **) malloc( sizeof(int*) * np ); - map_local_id = (int **) malloc( sizeof(int*) * np ); - map_recv_pc = (int **) malloc( sizeof(int*) * np ); - map_send_pc = (int **) malloc( sizeof(int*) * np ); - map_send_id = (int **) malloc( sizeof(int*) * np ); - - /* Set each local value to the global equation number */ - - for ( ieq = i = 0 ; i < np ; ++i ) { - const Box& mybox = pbox[i] ; - const int nx = mybox[0][1] - mybox[0][0] ; - const int ny = mybox[1][1] - mybox[1][0] ; - const int nz = mybox[2][1] - mybox[2][0] ; - int ix , iy , iz ; - - /* Generate the partition maps for this rank */ - box_partition_maps( np , i , &pbox[0] , ghost , - & map_local_id[i] , & map_recv_pc[i] , - & map_send_pc[i] , & map_send_id[i] ); - - local_values[i] = (int *) malloc( sizeof(int) * map_recv_pc[i][np] ); - - for ( iz = -ghost ; iz < nz + ghost ; ++iz ) { - for ( iy = -ghost ; iy < ny + ghost ; ++iy ) { - for ( ix = -ghost ; ix < nx + ghost ; ++ix ) { - const int ieq = box_map_local(mybox,ghost,map_local_id[i],ix,iy,iz); - - if ( 0 <= ieq ) { - const int ix_global = ix + mybox[0][0] ; - const int iy_global = iy + mybox[1][0] ; - const int iz_global = iz + mybox[2][0] ; - - if ( root_box[0][0] <= ix_global && ix_global < root_box[0][1] && - root_box[1][0] <= iy_global && iy_global < root_box[1][1] && - root_box[2][0] <= iz_global && iz_global < root_box[2][1] ) { - - local_values[i][ ieq ] = ix_global + - iy_global * nx_global + - iz_global * nx_global * ny_global ; - } - else { - local_values[i][ ieq ] = -1 ; - } - } - } - } - } - } - - /* Pair-wise compare the local values */ - /* i == receiving processor rank */ - /* ip == sending processor rank */ - /* j == receiving processor data entry for message from 'ip' */ - /* jp == sending processor data entry for message to 'i' */ - - for ( i = 0 ; i < np ; ++i ) { - for ( j = 1 ; j < np ; ++j ) { - const int ip = ( i + j ) % np ; - const int jp = ( i + np - ip ) % np ; - const int nrecv = map_recv_pc[i] [j+1] - map_recv_pc[i] [j] ; - const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ; - int k ; - if ( nrecv != nsend ) { - fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip); - fprintf(stderr,"P%d send %d to P%d\n",ip,nsend,i); - abort(); - } - for ( k = 0 ; k < nrecv ; ++k ) { - const int irecv = map_recv_pc[i][j] + k ; - const int isend = map_send_pc[ip][jp] + k ; - const int val_irecv = local_values[i][irecv] ; - const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ; - if ( val_irecv != val_isend ) { - fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip); - fprintf(stderr,"P%d send[%d] = %d , to P%d\n",ip,k,val_isend,i); - abort(); - } - } - } - } - - for ( i = 0 ; i < np ; ++i ) { - free( map_local_id[i] ); - free( map_recv_pc[i] ); - free( map_send_pc[i] ); - free( map_send_id[i] ); - free( local_values[i] ); - } - free( map_send_id ); - free( map_send_pc ); - free( map_recv_pc ); - free( map_local_id ); - free( local_values ); -} - -/*--------------------------------------------------------------------*/ - -int main( int argc , char * argv[] ) -{ - int np_max = 256 ; - Box box = { 0 , 64 , 0 , 64 , 0 , 64 }; - int np = 0 ; - - switch( argc ) { - case 3: - sscanf(argv[1],"%d",&np); - sscanf(argv[2],"%dx%dx%d",& box[0][1] , & box[1][1] , & box[2][1] ); - if ( 0 < np ) { test_box( box , np ); } - if ( 0 < np ) { test_maps( box , np ); } - break ; - default: - for ( np = 1 ; np <= np_max ; ++np ) { - test_box( box , np ); - test_maps( box , np ); - } - break ; - } - return 0 ; -} - -#endif - - diff --git a/kokkos/basic/BoxPartition.hpp b/kokkos/basic/BoxPartition.hpp deleted file mode 100644 index 4359a16..0000000 --- a/kokkos/basic/BoxPartition.hpp +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _BoxPartition_hpp_ -#define _BoxPartition_hpp_ - -#include - -/** \brief Recursively split a box into (up-ip) sub-boxes - */ -void box_partition( int ip , int up , int axis , - const Box& box , - Box* p_box ); - -/** \brief Partition a { [ix,jx) X [iy,jy) X [iz,jz) } box. - * - * Use recursive coordinate bisection to partition a box - * into np disjoint sub-boxes. Allocate (via malloc) and - * populate the sub-boxes, mapping the local (x,y,z) to - * a local ordinal, and mappings for the send-recv messages - * to update the ghost cells. - * - * usage: - * - * my_nx = pbox[my_p][0][1] - pbox[my_p][0][0] ; - * my_ny = pbox[my_p][1][1] - pbox[my_p][1][0] ; - * my_nz = pbox[my_p][2][1] - pbox[my_p][2][0] ; - * - * for ( x = -ghost ; x < my_nx + ghost ; ++x ) { - * for ( y = -ghost ; y < my_ny + ghost ; ++y ) { - * for ( z = -ghost ; z < my_nz + ghost ; ++z ) { - * const int x_global = x + pbox[my_p][0][0] ; - * const int y_global = y + pbox[my_p][1][0] ; - * const int z_global = z + pbox[my_p][2][0] ; - * - * const int local_ordinal = - * box_map_local( pbox[my_p], ghost, map_local_id, x, y, z ); - * - * if ( 0 <= local_ordinal ) { - * } - * } - * - * for ( i = 1 ; i < np ; ++i ) { - * const int recv_processor = ( my_p + i ) % np ; - * const int recv_ordinal_begin = map_recv_pc[i]; - * const int recv_ordinal_end = map_recv_pc[i+1]; - * } - * - * for ( i = 1 ; i < np ; ++i ) { - * const int send_processor = ( my_p + i ) % np ; - * const int send_map_begin = map_send_pc[i]; - * const int send_map_end = map_send_pc[i+1]; - * for ( j = send_map_begin ; j < send_map_end ; ++j ) { - * send_ordinal = map_send_id[j] ; - * } - * } - */ -void box_partition_rcb( - const int np /**< [in] Number of partitions */ , - const int my_p /**< [in] My partition rank */ , - const Box& root_box /**< [in] 3D Box to partition */ , - const int ghost /**< [in] Ghost cell boundary */ , - Box* pbox /**< [out] Partition's 3D boxes */ , - int ** map_local_id /**< [out] Map local cells */ , - int ** map_recv_pc /**< [out] Receive spans per processor */ , - int ** map_send_pc /**< [out] Send prefix counts per processor */ , - int ** map_send_id /**< [out] Send message ordinals */ ); - -/* \brief Map a local (x,y,z) to a local ordinal. - */ -int box_map_local( const Box& box_local , - const int ghost , - const int map_local_id[] , - const int local_x , - const int local_y , - const int local_z ); - -#endif - diff --git a/kokkos/basic/CSRMatrix.hpp b/kokkos/basic/CSRMatrix.hpp deleted file mode 100644 index 9cfeaee..0000000 --- a/kokkos/basic/CSRMatrix.hpp +++ /dev/null @@ -1,139 +0,0 @@ -#ifndef _CSRMatrix_hpp_ -#define _CSRMatrix_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include -#include -#ifdef HAVE_MPI -#include -#endif - -namespace miniFE { - -template -struct -CSRMatrix { - CSRMatrix(ComputeNode& comp_node) - : has_local_indices(false), - rows(), row_offsets(), row_offsets_external(), - packed_cols(), packed_coefs(), - num_cols(0), - compute_node(comp_node) -#ifdef HAVE_MPI - ,external_index(), external_local_index(), elements_to_send(), - neighbors(), recv_length(), send_length(), send_buffer(), request() -#endif - { - } - - ~CSRMatrix() - {} - - typedef Scalar ScalarType; - typedef LocalOrdinal LocalOrdinalType; - typedef GlobalOrdinal GlobalOrdinalType; - typedef ComputeNode ComputeNodeType; - - bool has_local_indices; - std::vector rows; - std::vector row_offsets; - std::vector row_offsets_external; - std::vector packed_cols; - std::vector packed_coefs; - LocalOrdinal num_cols; - ComputeNode& compute_node; - -#ifdef HAVE_MPI - std::vector external_index; - std::vector external_local_index; - std::vector elements_to_send; - std::vector neighbors; - std::vector recv_length; - std::vector send_length; - std::vector send_buffer; - std::vector request; -#endif - - size_t num_nonzeros() const - { - return row_offsets[row_offsets.size()-1]; - } - - void reserve_space(unsigned nrows, unsigned ncols_per_row) - { - rows.resize(nrows); - row_offsets.resize(nrows+1); - packed_cols.reserve(nrows * ncols_per_row); - packed_coefs.reserve(nrows * ncols_per_row); - } - - void get_row_pointers(GlobalOrdinalType row, size_t& row_length, - GlobalOrdinalType*& cols, - ScalarType*& coefs) - { - ptrdiff_t local_row = -1; - //first see if we can get the local-row index using fast direct lookup: - if (rows.size() >= 1) { - ptrdiff_t idx = row - rows[0]; - if (idx < rows.size() && rows[idx] == row) { - local_row = idx; - } - } - - //if we didn't get the local-row index using direct lookup, try a - //more expensive binary-search: - if (local_row == -1) { - typename std::vector::iterator row_iter = - std::lower_bound(rows.begin(), rows.end(), row); - - //if we still haven't found row, it's not local so jump out: - if (row_iter == rows.end() || *row_iter != row) { - row_length = 0; - return; - } - - local_row = row_iter - rows.begin(); - } - - LocalOrdinalType offset = row_offsets[local_row]; - row_length = row_offsets[local_row+1] - offset; - cols = &packed_cols[offset]; - coefs = &packed_coefs[offset]; - } -}; - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/ComputeNodeType.hpp b/kokkos/basic/ComputeNodeType.hpp deleted file mode 100644 index e59f3eb..0000000 --- a/kokkos/basic/ComputeNodeType.hpp +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _ComputeNodeType_hpp_ -#define _ComputeNodeType_hpp_ - -#if defined(MINIFE_HAVE_TBB) - -#include -#include -typedef TBBNode ComputeNodeType; - -#elif defined(MINIFE_HAVE_TPI) - -#include -#include -typedef TPINode ComputeNodeType; - -#elif defined(MINIFE_HAVE_CUDA) - -#include -typedef CUDANode ComputeNodeType; - -#else - -#include -typedef SerialComputeNode ComputeNodeType; - -#endif - -#endif - diff --git a/kokkos/basic/DotOp.hpp b/kokkos/basic/DotOp.hpp deleted file mode 100644 index 6471949..0000000 --- a/kokkos/basic/DotOp.hpp +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef DOTOP_HPP_ -#define DOTOP_HPP_ - -template -struct DotOp { - typedef Scalar ReductionType; - - const Scalar* x; - const Scalar* y; - - size_t n; - - ReductionType result; - - inline DotOp() { - result = identity(); - } - - static inline KERNEL_PREFIX ReductionType identity() - { - return 0.0; - } - - inline KERNEL_PREFIX ReductionType reduce(ReductionType u, ReductionType v) const - { - return u+v; - } - - inline KERNEL_PREFIX Scalar generate(int i) const - { - return x[i]*y[i]; - } -}; - -#endif diff --git a/kokkos/basic/ELLMatrix.hpp b/kokkos/basic/ELLMatrix.hpp deleted file mode 100644 index 97b662f..0000000 --- a/kokkos/basic/ELLMatrix.hpp +++ /dev/null @@ -1,144 +0,0 @@ -#ifndef _ELLMatrix_hpp_ -#define _ELLMatrix_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include -#include -#ifdef HAVE_MPI -#include -#endif - -namespace miniFE { - -template -struct -ELLMatrix { - ELLMatrix(ComputeNode& comp_node) - : has_local_indices(false), - rows(), - cols(), coefs(), - num_cols(0), - num_cols_per_row(0), - compute_node(comp_node) -#ifdef HAVE_MPI - ,external_index(), external_local_index(), elements_to_send(), - neighbors(), recv_length(), send_length(), send_buffer(), request() -#endif - { - } - - ~ELLMatrix() - {} - - typedef Scalar ScalarType; - typedef LocalOrdinal LocalOrdinalType; - typedef GlobalOrdinal GlobalOrdinalType; - typedef ComputeNode ComputeNodeType; - - bool has_local_indices; - std::vector rows; - std::vector cols; - std::vector coefs; - LocalOrdinal num_cols; - LocalOrdinal num_cols_per_row; - ComputeNode& compute_node; - -#ifdef HAVE_MPI - std::vector external_index; - std::vector external_local_index; - std::vector elements_to_send; - std::vector neighbors; - std::vector recv_length; - std::vector send_length; - std::vector send_buffer; - std::vector request; -#endif - - size_t num_nonzeros() const - { - return rows.size()*num_cols_per_row; - } - - void reserve_space(unsigned nrows, unsigned ncols_per_row) - { - rows.resize(nrows); - cols.resize(nrows * ncols_per_row); - coefs.resize(nrows * ncols_per_row); - num_cols_per_row = ncols_per_row; - } - - void get_row_pointers(GlobalOrdinalType row, size_t& row_length, - GlobalOrdinalType*& cols_ptr, - ScalarType*& coefs_ptr) - { - ptrdiff_t local_row = -1; - //first see if we can get the local-row index using fast direct lookup: - if (rows.size() >= 1) { - ptrdiff_t idx = row - rows[0]; - if (idx < rows.size() && rows[idx] == row) { - local_row = idx; - } - } - - //if we didn't get the local-row index using direct lookup, try a - //more expensive binary-search: - if (local_row == -1) { - typename std::vector::iterator row_iter = - std::lower_bound(rows.begin(), rows.end(), row); - - //if we still haven't found row, it's not local so jump out: - if (row_iter == rows.end() || *row_iter != row) { - row_length = 0; - return; - } - - local_row = row_iter - rows.begin(); - } - - cols_ptr = &cols[local_row*num_cols_per_row]; - coefs_ptr = &coefs[local_row*num_cols_per_row]; - - int idx = num_cols_per_row-1; - while(idx>=0) { - if (cols_ptr[idx] != 0) break; - --idx; - } - row_length = idx+1; - } -}; - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/FEComputeElem.hpp b/kokkos/basic/FEComputeElem.hpp deleted file mode 100644 index 03aa8a2..0000000 --- a/kokkos/basic/FEComputeElem.hpp +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef FECOMPUTEELEM_HPP_ -#define FECOMPUTEELEM_HPP_ - -#include - -#ifndef KERNEL_PREFIX -#define KERNEL_PREFIX -#endif - -template -struct FEComputeElem { - Scalar* elem_node_coords; - Scalar* elem_diffusion_matrix; - Scalar* elem_source_vector; - -inline KERNEL_PREFIX void operator()(int i) -{ - unsigned nnodes = miniFE::Hex8::numNodesPerElem; - unsigned dim = miniFE::Hex8::spatialDim; - Scalar* coords = elem_node_coords+i*nnodes*dim; - Scalar* diffusionMat = elem_diffusion_matrix+i*nnodes*nnodes; - Scalar* sourceVec = elem_source_vector+i*nnodes; - - miniFE::Hex8::diffusionMatrix(coords, diffusionMat); - miniFE::Hex8::sourceVector(coords, sourceVec); -} -}; - -#endif diff --git a/kokkos/basic/FusedMatvecDotOp.hpp b/kokkos/basic/FusedMatvecDotOp.hpp deleted file mode 100644 index e4b59e4..0000000 --- a/kokkos/basic/FusedMatvecDotOp.hpp +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef FUSEDMATVECDOTOP_HPP_ -#define FUSEDMATVECDOTOP_HPP_ - -#ifndef KERNEL_PREFIX -#define KERNEL_PREFIX -#endif - -template -struct FusedMatvecDotOp { - - typedef typename VectorType::GlobalOrdinalType GlobalOrdinalType; - typedef typename VectorType::LocalOrdinalType LocalOrdinalType; - typedef typename VectorType::ScalarType ScalarType; - typedef ScalarType ReductionType; - - size_t n; - - const LocalOrdinalType* Arowoffsets; - const GlobalOrdinalType* Acols; - const ScalarType* Acoefs; - - const ScalarType* x; - ScalarType* y; - ScalarType beta; - - ReductionType result; - - inline FusedMatvecDotOp() { - result = identity(); - } - - static inline KERNEL_PREFIX ReductionType identity() - { - return 0.0; - } - - inline KERNEL_PREFIX ReductionType reduce(ReductionType u, ReductionType v) const - { - return u+v; - } - - inline KERNEL_PREFIX ScalarType generate(int row) - { - //we count on the caller (ComputeNode) to pass in 'row' - //in range 0..n-1 - - ScalarType sum = beta*y[row]; - - for(LocalOrdinalType i=Arowoffsets[row]; i -#include - -template -struct GetNodesCoords { - const miniFE::simple_mesh_description* mesh; - GlobalOrdinal* elemIDs; - GlobalOrdinal* node_ordinals; - Scalar* elem_node_coords; - -inline void operator()(int i) -{ - unsigned nnodes = miniFE::Hex8::numNodesPerElem; - GlobalOrdinal elemID = elemIDs[i]; - GlobalOrdinal* node_ords = node_ordinals+i*nnodes; - Scalar* node_coords = elem_node_coords+i*nnodes*miniFE::Hex8::spatialDim; - get_elem_nodes_and_coords(*mesh, elemID, node_ords, node_coords); -} -}; - -#endif diff --git a/kokkos/basic/Hex8_box_utils.hpp b/kokkos/basic/Hex8_box_utils.hpp deleted file mode 100644 index c1662ec..0000000 --- a/kokkos/basic/Hex8_box_utils.hpp +++ /dev/null @@ -1,174 +0,0 @@ -#ifndef _Hex8_box_utils_hpp_ -#define _Hex8_box_utils_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include - -#include -#include -#include -#include - -namespace miniFE { - - -template -void get_hex8_node_ids(int nx, int ny, - GlobalOrdinal node0, - GlobalOrdinal* elem_node_ids) -{ -//Given box dimensions nx and ny, and a starting node -//(local-node-0 for a hex8), compute the other nodes -//of the hex8 using the exodus ordering convention. - elem_node_ids[0] = node0; - elem_node_ids[1] = node0 + 1; - elem_node_ids[2] = node0 + nx + 1; - elem_node_ids[3] = node0 + nx; - elem_node_ids[4] = node0 + nx*ny; - elem_node_ids[5] = node0 + 1 + nx*ny; - elem_node_ids[6] = node0 + nx + nx*ny + 1; - elem_node_ids[7] = node0 + nx + nx*ny; -} - -template -void get_hex8_node_coords_3d(Scalar x, Scalar y, Scalar z, - Scalar hx, Scalar hy, Scalar hz, - Scalar* elem_node_coords) -{ - //Input: x,y,z are the coordinates of local-node 0 for a Hex8. - //'hx', 'hy', 'hz' are the lengths of the sides of the element - //in each direction. - - elem_node_coords[0] = x; - elem_node_coords[1] = y; - elem_node_coords[2] = z; - - elem_node_coords[3] = x + hx; - elem_node_coords[4] = y; - elem_node_coords[5] = z; - - elem_node_coords[6] = x + hx; - elem_node_coords[7] = y + hy; - elem_node_coords[8] = z; - - elem_node_coords[9] = x; - elem_node_coords[10] = y + hy; - elem_node_coords[11] = z; - - elem_node_coords[12] = x; - elem_node_coords[13] = y; - elem_node_coords[14] = z + hz; - - elem_node_coords[15] = x + hx; - elem_node_coords[16] = y; - elem_node_coords[17] = z + hz; - - elem_node_coords[18] = x + hx; - elem_node_coords[19] = y + hy; - elem_node_coords[20] = z + hz; - - elem_node_coords[21] = x; - elem_node_coords[22] = y + hy; - elem_node_coords[23] = z + hz; -} - -template -void -get_elem_nodes_and_coords(const simple_mesh_description& mesh, - GlobalOrdinal elemID, - GlobalOrdinal* node_ords, Scalar* node_coords) -{ - int global_nodes_x = mesh.global_box[0][1]+1; - int global_nodes_y = mesh.global_box[1][1]+1; - int global_nodes_z = mesh.global_box[2][1]+1; - - if (elemID < 0) { - //I don't think this can happen, but check for the sake of paranoia... - throw std::runtime_error("get_elem_nodes_and_coords ERROR, negative elemID"); - } - - int elem_int_x, elem_int_y, elem_int_z; - get_int_coords(elemID, global_nodes_x-1, global_nodes_y-1, global_nodes_z-1, - elem_int_x, elem_int_y, elem_int_z); - GlobalOrdinal nodeID = get_id(global_nodes_x, global_nodes_y, global_nodes_z, elem_int_x, elem_int_y, elem_int_z); - -#ifdef MINIFE_DEBUG - std::cout<<"\nelemID: "<(nodeID, global_nodes_x,global_nodes_y,global_nodes_z, - ix,iy,iz); - Scalar hx = 1.0/global_elems_x; - Scalar hy = 1.0/global_elems_y; - Scalar hz = 1.0/global_elems_z; - get_hex8_node_coords_3d(ix, iy, iz, hx, hy, hz, node_coords); -#ifdef MINIFE_DEBUG - int offset = 0; - for(int i=0; i -void -get_elem_nodes_and_coords(const simple_mesh_description& mesh, - GlobalOrdinal elemID, - ElemData& elem_data) -{ - get_elem_nodes_and_coords(mesh, elemID, elem_data.elem_node_ids, elem_data.elem_node_coords); -} - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/Lock.hpp b/kokkos/basic/Lock.hpp deleted file mode 100644 index 16be86f..0000000 --- a/kokkos/basic/Lock.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef _Lock_hpp_ -#define _Lock_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#ifdef MINIFE_HAVE_TBB - -#include -#include - -namespace miniFE { - -static tbb::atomic miniFE_num_matrix_conflicts; -static tbb::atomic miniFE_num_vector_conflicts; - -//We have two lock classes, LockM and LockV. The only reason for -//this is so that they can separately track the number of conflicts -//for matrix accesses versus vector accesses (by incrementing the -//above counters). -//The LockingMatrix class uses LockM, LockingVector uses LockV. - -template -class LockM { -public: - // Constructors/destructors - LockM(tbb::atomic& row) - : locked_row_(row) - { - if (++locked_row_ != 1) { - unsigned counter = 0; - while(locked_row_ != 1) { - ++counter; - } - ++miniFE_num_matrix_conflicts; - } - } - ~LockM() - { --locked_row_; } - -private: - tbb::atomic& locked_row_; - LockM(const LockM&); - LockM& operator=(const LockM&); -}; - -template -class LockV { -public: - // Constructors/destructors - LockV(tbb::atomic& row) - : locked_row_(row) - { - if (++locked_row_ != 1) { - unsigned counter = 0; - while(locked_row_ != 1) { - ++counter; - } - ++miniFE_num_vector_conflicts; - } - } - ~LockV() - { --locked_row_; } - -private: - tbb::atomic& locked_row_; - LockV(const LockV&); - LockV& operator=(const LockV&); -}; - -}//namespace miniFE - -#else -#error "ERROR, this file shouldn't be compiled if MINIFE_HAVE_TBB isn't defined." -#endif - -#endif - diff --git a/kokkos/basic/LockingMatrix.hpp b/kokkos/basic/LockingMatrix.hpp deleted file mode 100644 index c278274..0000000 --- a/kokkos/basic/LockingMatrix.hpp +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef _LockingMatrix_hpp_ -#define _LockingMatrix_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include - -#include - -namespace miniFE { - -template -class LockingMatrix { -public: - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - typedef typename MatrixType::ScalarType Scalar; - - LockingMatrix(MatrixType& A) : A_(A), myFirstRow_(0), myLastRow_(0), numMyRows_(0), row_locks_() - { - if (A_.rows.size() > 0) { - myFirstRow_ = A_.rows[0]; - myLastRow_ = A_.rows[A_.rows.size()-1]; - } - numMyRows_ = myLastRow_-myFirstRow_+1; - row_locks_.resize(numMyRows_); - } - - void sum_in(GlobalOrdinal row, size_t row_len, const GlobalOrdinal* col_indices, const Scalar* values) - { - int local_row = row - myFirstRow_; - if (local_row >= 0 && local_row < numMyRows_) { - LockM lock(row_locks_[local_row]); - sum_into_row(row, row_len, col_indices, values, A_); - } - } - -private: - MatrixType& A_; - GlobalOrdinal myFirstRow_; - GlobalOrdinal myLastRow_; - size_t numMyRows_; - std::vector > row_locks_; -}; - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/LockingVector.hpp b/kokkos/basic/LockingVector.hpp deleted file mode 100644 index 60f7598..0000000 --- a/kokkos/basic/LockingVector.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef _LockingVector_hpp_ -#define _LockingVector_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include - -#include - -namespace miniFE { - -template -class LockingVector { -public: - typedef typename VectorType::GlobalOrdinalType GlobalOrdinal; - typedef typename VectorType::ScalarType Scalar; - - LockingVector(VectorType& x) : x_(x), myFirstRow_(0), myLastRow_(0), numMyRows_(0), row_locks_() - { - if (x_.local_size > 0) { - myFirstRow_ = x_.startIndex; - myLastRow_ = myFirstRow_ + x_.local_size - 1; - } - numMyRows_ = myLastRow_-myFirstRow_+1; - row_locks_.resize(numMyRows_); - } - - void sum_in(size_t num_indices, const GlobalOrdinal* indices, const Scalar* values) - { - for(int i=0; i= 0 && local_row < numMyRows_) { - LockV lock(row_locks_[local_row]); - sum_into_vector(1, &row, &values[i], x_); - } - } - } - -private: - VectorType& x_; - GlobalOrdinal myFirstRow_; - GlobalOrdinal myLastRow_; - size_t numMyRows_; - std::vector > row_locks_; -}; - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/MatrixCopyOp.hpp b/kokkos/basic/MatrixCopyOp.hpp deleted file mode 100644 index f6c300a..0000000 --- a/kokkos/basic/MatrixCopyOp.hpp +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _MatrixCopyOp_hpp_ -#define _MatrixCopyOp_hpp_ - -template -struct MatrixCopyOp { - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; - typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; - typedef typename MatrixType::ScalarType ScalarType; - - const GlobalOrdinalType* src_rows; - const LocalOrdinalType* src_rowoffsets; - const GlobalOrdinalType* src_cols; - const ScalarType* src_coefs; - - GlobalOrdinalType* dest_rows; - LocalOrdinalType* dest_rowoffsets; - GlobalOrdinalType* dest_cols; - ScalarType* dest_coefs; - int n; - - inline void operator()(int i) - { - dest_rows[i] = src_rows[i]; - dest_rowoffsets[i] = src_rowoffsets[i]; - for(int j=src_rowoffsets[i]; j -#include -#include - -#include -#include - -#include - -template -void sort_if_needed(GlobalOrdinal* list, - GlobalOrdinal list_len) -{ - bool need_to_sort = false; - for(GlobalOrdinal i=list_len-1; i>=1; --i) { - if (list[i] < list[i-1]) { - need_to_sort = true; - break; - } - } - - if (need_to_sort) { - std::sort(list,list+list_len); - } -} - -template -struct MatrixInitOp { -}; - -template<> -struct MatrixInitOp > { - MatrixInitOp(const std::vector& rows_vec, - const std::vector& row_offsets_vec, - const std::vector& row_coords_vec, - int global_nx, int global_ny, int global_nz, - MINIFE_GLOBAL_ORDINAL global_n_rows, - const miniFE::simple_mesh_description& input_mesh, - miniFE::CSRMatrix& matrix) - : rows(&rows_vec[0]), - row_offsets(&row_offsets_vec[0]), - row_coords(&row_coords_vec[0]), - global_nodes_x(global_nx), - global_nodes_y(global_ny), - global_nodes_z(global_nz), - global_nrows(global_n_rows), - mesh(&input_mesh), - dest_rows(&matrix.rows[0]), - dest_rowoffsets(&matrix.row_offsets[0]), - dest_cols(&matrix.packed_cols[0]), - dest_coefs(&matrix.packed_coefs[0]), - n(matrix.rows.size()) - { - matrix.packed_cols.resize(row_offsets_vec[n]); - matrix.packed_coefs.resize(row_offsets_vec[n]); - dest_rowoffsets[n] = row_offsets_vec[n]; - } - - typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType; - typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType; - typedef MINIFE_SCALAR ScalarType; - - const GlobalOrdinalType* rows; - const LocalOrdinalType* row_offsets; - const int* row_coords; - - int global_nodes_x; - int global_nodes_y; - int global_nodes_z; - - GlobalOrdinalType global_nrows; - - GlobalOrdinalType* dest_rows; - LocalOrdinalType* dest_rowoffsets; - GlobalOrdinalType* dest_cols; - ScalarType* dest_coefs; - int n; - - const miniFE::simple_mesh_description* mesh; - - inline void operator()(int i) - { - dest_rows[i] = rows[i]; - int offset = row_offsets[i]; - dest_rowoffsets[i] = offset; - int ix = row_coords[i*3]; - int iy = row_coords[i*3+1]; - int iz = row_coords[i*3+2]; - GlobalOrdinalType nnz = 0; - for(int sz=-1; sz<=1; ++sz) - for(int sy=-1; sy<=1; ++sy) - for(int sx=-1; sx<=1; ++sx) { - GlobalOrdinalType col_id = - miniFE::get_id(global_nodes_x, global_nodes_y, global_nodes_z, - ix+sx, iy+sy, iz+sz); - if (col_id >= 0 && col_id < global_nrows) { - GlobalOrdinalType col = mesh->map_id_to_row(col_id); - dest_cols[offset+nnz] = col; - dest_coefs[offset+nnz] = 0; - ++nnz; - } - } - - sort_if_needed(&dest_cols[offset], nnz); - } -}; - -template<> -struct MatrixInitOp > { - MatrixInitOp(const std::vector& rows_vec, - const std::vector& /*row_offsets_vec*/, - const std::vector& row_coords_vec, - int global_nx, int global_ny, int global_nz, - MINIFE_GLOBAL_ORDINAL global_n_rows, - const miniFE::simple_mesh_description& input_mesh, - miniFE::ELLMatrix& matrix) - : rows(&rows_vec[0]), - row_coords(&row_coords_vec[0]), - global_nodes_x(global_nx), - global_nodes_y(global_ny), - global_nodes_z(global_nz), - global_nrows(global_n_rows), - mesh(&input_mesh), - dest_rows(&matrix.rows[0]), - dest_cols(&matrix.cols[0]), - dest_coefs(&matrix.coefs[0]), - n(matrix.rows.size()), - ncols_per_row(matrix.num_cols_per_row) - { - } - - typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType; - typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType; - typedef MINIFE_SCALAR ScalarType; - - const GlobalOrdinalType* rows; - const int* row_coords; - - int global_nodes_x; - int global_nodes_y; - int global_nodes_z; - - GlobalOrdinalType global_nrows; - - GlobalOrdinalType* dest_rows; - GlobalOrdinalType* dest_cols; - ScalarType* dest_coefs; - int n; - int ncols_per_row; - - const miniFE::simple_mesh_description* mesh; - - inline void operator()(int i) - { - dest_rows[i] = rows[i]; - int offset = i*ncols_per_row; - int ix = row_coords[i*3]; - int iy = row_coords[i*3+1]; - int iz = row_coords[i*3+2]; - GlobalOrdinalType nnz = 0; - for(int sz=-1; sz<=1; ++sz) - for(int sy=-1; sy<=1; ++sy) - for(int sx=-1; sx<=1; ++sx) { - GlobalOrdinalType col_id = - miniFE::get_id(global_nodes_x, global_nodes_y, global_nodes_z, - ix+sx, iy+sy, iz+sz); - if (col_id >= 0 && col_id < global_nrows) { - GlobalOrdinalType col = mesh->map_id_to_row(col_id); - dest_cols[offset+nnz] = col; - dest_coefs[offset+nnz] = 0; - ++nnz; - } - } - - sort_if_needed(&dest_cols[offset], nnz); - } -}; - -#endif - diff --git a/kokkos/basic/MatvecOp.hpp b/kokkos/basic/MatvecOp.hpp deleted file mode 100644 index 9c5c8e4..0000000 --- a/kokkos/basic/MatvecOp.hpp +++ /dev/null @@ -1,99 +0,0 @@ -#ifndef _MatvecOp_hpp_ -#define _MatvecOp_hpp_ - -#ifndef KERNEL_PREFIX -#define KERNEL_PREFIX -#endif - -#include -#include -#include - -template -struct MatvecOp { -}; - -template<> -struct MatvecOp > { - MatvecOp(miniFE::CSRMatrix& A) - : n(A.rows.size()), - Arowoffsets(&A.row_offsets[0]), - Acols(&A.packed_cols[0]), - Acoefs(&A.packed_coefs[0]) - { - } - - size_t n; - - typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType; - typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType; - typedef MINIFE_SCALAR ScalarType; - - const LocalOrdinalType* Arowoffsets; - const GlobalOrdinalType* Acols; - const ScalarType* Acoefs; - - const ScalarType* x; - ScalarType* y; - ScalarType beta; - - inline KERNEL_PREFIX void operator()(int row) - { - //we count on the caller (ComputeNode) to pass in 'row' - //in range 0..n-1 - - ScalarType sum = beta*y[row]; - - for(LocalOrdinalType i=Arowoffsets[row]; i -struct MatvecOp > { - MatvecOp(miniFE::ELLMatrix& A) - : n(A.rows.size()), - Acols(&A.cols[0]), - Acoefs(&A.coefs[0]), - ncols_per_row(A.num_cols_per_row) - { - } - - size_t n; - - typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType; - typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType; - typedef MINIFE_SCALAR ScalarType; - - const GlobalOrdinalType* Acols; - const ScalarType* Acoefs; - int ncols_per_row; - - const ScalarType* x; - ScalarType* y; - ScalarType beta; - - inline KERNEL_PREFIX void operator()(int row) - { - //we count on the caller (ComputeNode) to pass in 'row' - //in range 0..n-1 - - ScalarType sum = beta*y[row]; - - for(LocalOrdinalType i=0; i -struct MemInitOp { - Scalar* ptr; - size_t n; - inline void operator()(size_t i) - { - ptr[i] = 0; - } -}; - -#endif diff --git a/kokkos/basic/NoOpMemoryModel.hpp b/kokkos/basic/NoOpMemoryModel.hpp deleted file mode 100644 index 92d1eb1..0000000 --- a/kokkos/basic/NoOpMemoryModel.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _NoOpMemoryModel_hpp_ -#define _NoOpMemoryModel_hpp_ - -class NoOpMemoryModel { - public: - NoOpMemoryModel(){} - virtual ~NoOpMemoryModel(){} - - template - T* get_buffer(const T* host_ptr, size_t buf_size) - { return const_cast(host_ptr); } - - template - void destroy_buffer(T*& device_ptr) - { } - - template - void copy_to_buffer(const T* host_ptr, size_t buf_size, T* device_ptr) - { } - - template - void copy_from_buffer(T* host_ptr, size_t buf_size, const T* device_ptr) - { } -}; - -#endif - diff --git a/kokkos/basic/SerialComputeNode.hpp b/kokkos/basic/SerialComputeNode.hpp deleted file mode 100644 index 1f45ed8..0000000 --- a/kokkos/basic/SerialComputeNode.hpp +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef SERIALCOMPUTENODE_HPP_ -#define SERIALCOMPUTENODE_HPP_ - -#include - -class SerialComputeNode : public NoOpMemoryModel { - public: - template - void parallel_for(unsigned int length, WDP wd) { - for(int i=0; i - void parallel_reduce(unsigned int length, WDP &wd) { - wd.result = wd.identity(); - for(int i=0; i -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef MINIFE_HAVE_TBB -#include -#endif - -#ifdef HAVE_MPI -#include -#endif - -namespace miniFE { - -template -void init_matrix(MatrixType& M, - const std::vector& rows, - const std::vector& row_offsets, - const std::vector& row_coords, - int global_nodes_x, - int global_nodes_y, - int global_nodes_z, - typename MatrixType::GlobalOrdinalType global_nrows, - const simple_mesh_description& mesh) -{ - MatrixInitOp mat_init(rows, row_offsets, row_coords, - global_nodes_x, global_nodes_y, global_nodes_z, - global_nrows, mesh, M); - -#ifdef MINIFE_HAVE_CUDA -//if on cuda, don't do this with parallel_for... - for(size_t i=0; i -void sort_with_companions(ptrdiff_t len, T* array, U* companions) -{ - ptrdiff_t i, j, index; - U companion; - - for (i=1; i < len; i++) { - index = array[i]; - companion = companions[i]; - j = i; - while ((j > 0) && (array[j-1] > index)) - { - array[j] = array[j-1]; - companions[j] = companions[j-1]; - j = j - 1; - } - array[j] = index; - companions[j] = companion; - } -} - -template -void write_matrix(const std::string& filename, - MatrixType& mat) -{ - typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; - typedef typename MatrixType::ScalarType ScalarType; - - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - std::ostringstream osstr; - osstr << filename << "." << numprocs << "." << myproc; - std::string full_name = osstr.str(); - std::ofstream ofs(full_name.c_str()); - - size_t nrows = mat.rows.size(); - size_t nnz = mat.num_nonzeros(); - - for(int p=0; p -void -sum_into_row(int row_len, - GlobalOrdinal* row_indices, - Scalar* row_coefs, - int num_inputs, - const GlobalOrdinal* input_indices, - const Scalar* input_coefs) -{ - for(size_t i=0; i -void -sum_into_row(typename MatrixType::GlobalOrdinalType row, - size_t num_indices, - const typename MatrixType::GlobalOrdinalType* col_inds, - const typename MatrixType::ScalarType* coefs, - MatrixType& mat) -{ - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - typedef typename MatrixType::ScalarType Scalar; - - size_t row_len = 0; - GlobalOrdinal* mat_row_cols = NULL; - Scalar* mat_row_coefs = NULL; - - mat.get_row_pointers(row, row_len, mat_row_cols, mat_row_coefs); - if (row_len == 0) return; - - sum_into_row(row_len, mat_row_cols, mat_row_coefs, num_indices, col_inds, coefs); -} - -template -void -sum_in_symm_elem_matrix(size_t num, - const typename MatrixType::GlobalOrdinalType* indices, - const typename MatrixType::ScalarType* coefs, - MatrixType& mat) -{ - typedef typename MatrixType::ScalarType Scalar; - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - -//indices is length num (which should be nodes-per-elem) -//coefs is the upper triangle of the element diffusion matrix -//which should be length num*(num+1)/2 -//std::cout< -void -sum_in_elem_matrix(size_t num, - const typename MatrixType::GlobalOrdinalType* indices, - const typename MatrixType::ScalarType* coefs, - MatrixType& mat) -{ - size_t offset = 0; - - for(size_t i=0; i -void -sum_into_global_linear_system(ElemData& elem_data, - MatrixType& A, VectorType& b) -{ - sum_in_symm_elem_matrix(elem_data.nodes_per_elem, elem_data.elem_node_ids, - elem_data.elem_diffusion_matrix, A); - sum_into_vector(elem_data.nodes_per_elem, elem_data.elem_node_ids, - elem_data.elem_source_vector, b); -} - -#ifdef MINIFE_HAVE_TBB -template -void -sum_in_elem_matrix(size_t num, - const typename MatrixType::GlobalOrdinalType* indices, - const typename MatrixType::ScalarType* coefs, - LockingMatrix& mat) -{ - size_t offset = 0; - - for(size_t i=0; i -void -sum_into_global_linear_system(ElemData& elem_data, - LockingMatrix& A, LockingVector& b) -{ - sum_in_elem_matrix(elem_data.nodes_per_elem, elem_data.elem_node_ids, - elem_data.elem_diffusion_matrix, A); - sum_into_vector(elem_data.nodes_per_elem, elem_data.elem_node_ids, - elem_data.elem_source_vector, b); -} -#endif - -template -void -add_to_diagonal(typename MatrixType::ScalarType value, MatrixType& mat) -{ - for(size_t i=0; i -double -parallel_memory_overhead_MB(const MatrixType& A) -{ - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - typedef typename MatrixType::LocalOrdinalType LocalOrdinal; - double mem_MB = 0; - -#ifdef HAVE_MPI - double invMB = 1.0/(1024*1024); - mem_MB = invMB*A.external_index.size()*sizeof(GlobalOrdinal); - mem_MB += invMB*A.external_local_index.size()*sizeof(GlobalOrdinal); - mem_MB += invMB*A.elements_to_send.size()*sizeof(GlobalOrdinal); - mem_MB += invMB*A.neighbors.size()*sizeof(int); - mem_MB += invMB*A.recv_length.size()*sizeof(LocalOrdinal); - mem_MB += invMB*A.send_length.size()*sizeof(LocalOrdinal); - - double tmp = mem_MB; - MPI_Allreduce(&tmp, &mem_MB, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); -#endif - - return mem_MB; -} - -template -void rearrange_matrix_local_external(MatrixType& A) -{ - //This function will rearrange A so that local entries are contiguous at the front - //of A's memory, and external entries are contiguous at the back of A's memory. - // - //A.row_offsets will describe where the local entries occur, and - //A.row_offsets_external will describe where the external entries occur. - - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - typedef typename MatrixType::LocalOrdinalType LocalOrdinal; - typedef typename MatrixType::ScalarType Scalar; - - size_t nrows = A.rows.size(); - std::vector tmp_row_offsets(nrows*2); - std::vector tmp_row_offsets_external(nrows*2); - - LocalOrdinal num_local_nz = 0; - LocalOrdinal num_extern_nz = 0; - - //First sort within each row of A, so that local entries come - //before external entries within each row. - //tmp_row_offsets describe the locations of the local entries, and - //tmp_row_offsets_external describe the locations of the external entries. - // - for(size_t i=0; i ext_cols(num_extern_nz); - std::vector ext_coefs(num_extern_nz); - std::vector ext_offsets(nrows+1); - LocalOrdinal offset = 0; - for(size_t i=0; i -void -zero_row_and_put_1_on_diagonal(MatrixType& A, typename MatrixType::GlobalOrdinalType row) -{ - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - typedef typename MatrixType::LocalOrdinalType LocalOrdinal; - typedef typename MatrixType::ScalarType Scalar; - - size_t row_len = 0; - GlobalOrdinal* cols = NULL; - Scalar* coefs = NULL; - A.get_row_pointers(row, row_len, cols, coefs); - - for(size_t i=0; i -void -impose_dirichlet(typename MatrixType::ScalarType prescribed_value, - MatrixType& A, - VectorType& b, - int global_nx, - int global_ny, - int global_nz, - const std::set& bc_rows) -{ - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - typedef typename MatrixType::LocalOrdinalType LocalOrdinal; - typedef typename MatrixType::ScalarType Scalar; - - GlobalOrdinal first_local_row = A.rows.size()>0 ? A.rows[0] : 0; - GlobalOrdinal last_local_row = A.rows.size()>0 ? A.rows[A.rows.size()-1] : -1; - - typename std::set::const_iterator - bc_iter = bc_rows.begin(), bc_end = bc_rows.end(); - for(; bc_iter!=bc_end; ++bc_iter) { - GlobalOrdinal row = *bc_iter; - if (row >= first_local_row && row <= last_local_row) { - size_t local_row = row - first_local_row; - b.coefs[local_row] = prescribed_value; - zero_row_and_put_1_on_diagonal(A, row); - } - } - - for(size_t i=0; i -typename TypeTraits::magnitude_type -matvec_and_dot(MatrixType& A, - VectorType& x, - VectorType& y) -{ - timer_type t0 = mytimer(); - exchange_externals(A, x); - exchtime += mytimer()-t0; - - typedef typename TypeTraits::magnitude_type magnitude; - typedef typename MatrixType::ScalarType ScalarType; - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; - typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; - typedef typename MatrixType::ComputeNodeType ComputeNodeType; - - ComputeNodeType& comp_node = A.compute_node; - - FusedMatvecDotOp mvdotop; - - mvdotop.n = A.rows.size(); - mvdotop.Arowoffsets = comp_node.get_buffer(&A.row_offsets[0], A.row_offsets.size()); - mvdotop.Acols = comp_node.get_buffer(&A.packed_cols[0], A.packed_cols.size()); - mvdotop.Acoefs = comp_node.get_buffer(&A.packed_coefs[0], A.packed_coefs.size()); - mvdotop.x = comp_node.get_buffer(&x.coefs[0], x.coefs.size()); - mvdotop.y = comp_node.get_buffer(&y.coefs[0], y.coefs.size()); - mvdotop.beta = 0; - - comp_node.parallel_reduce(mvdotop.n, mvdotop); - -#ifdef HAVE_MPI - magnitude local_dot = mvdotop.result, global_dot = 0; - MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); - MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD); - return global_dot; -#else - return mvdotop.result; -#endif -} - -//------------------------------------------------------------------------ -//Compute matrix vector product y = A*x where: -// -// A - input matrix -// x - input vector -// y - result vector -// -template -struct matvec_std { -void operator()(MatrixType& A, - VectorType& x, - VectorType& y) -{ - exchange_externals(A, x); - - typedef typename MatrixType::ScalarType ScalarType; - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; - typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; - typedef typename MatrixType::ComputeNodeType ComputeNodeType; - - ComputeNodeType& comp_node = A.compute_node; - - MatvecOp mvop(A); - - mvop.x = comp_node.get_buffer(&x.coefs[0], x.coefs.size()); - mvop.y = comp_node.get_buffer(&y.coefs[0], y.coefs.size()); - mvop.beta = 0; - - comp_node.parallel_for(mvop.n, mvop); -} -}; - -template -void matvec(MatrixType& A, VectorType& x, VectorType& y) -{ - matvec_std mv; - mv(A, x, y); -} - -template -struct matvec_overlap { -void operator()(MatrixType& A, - VectorType& x, - VectorType& y) -{ -#ifdef HAVE_MPI - begin_exchange_externals(A, x); -#endif - - typedef typename MatrixType::ScalarType ScalarType; - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; - typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; - typedef typename MatrixType::ComputeNodeType ComputeNodeType; - - ComputeNodeType& comp_node = A.compute_node; - - MatvecOp mvop(A); - - mvop.x = comp_node.get_buffer(&x.coefs[0], x.coefs.size()); - mvop.y = comp_node.get_buffer(&y.coefs[0], y.coefs.size()); - mvop.beta = 0; - - comp_node.parallel_for(mvop.n, mvop); - -#ifdef HAVE_MPI - finish_exchange_externals(A.neighbors.size()); - - mvop.Arowoffsets = comp_node.get_buffer(&A.row_offsets_external[0], A.row_offsets_external.size()); - mvop.beta = 1; - - comp_node.parallel_for(A.rows.size(), mvop); -#endif -} -}; - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/SumInLinSys.hpp b/kokkos/basic/SumInLinSys.hpp deleted file mode 100644 index d5f6471..0000000 --- a/kokkos/basic/SumInLinSys.hpp +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _SUMINLINSYS_HPP_ -#define _SUMINLINSYS_HPP_ - -#include -#include -#include - -template -struct SumInLinSys { - GlobalOrdinal* node_ordinals; - Scalar* elem_diffusion_matrix; - Scalar* elem_source_vector; - miniFE::LockingMatrix* A; - miniFE::LockingVector* b; - -inline void operator()(int i) -{ - size_t nnodes = miniFE::Hex8::numNodesPerElem; - GlobalOrdinal* node_ords = node_ordinals+i*nnodes; - Scalar* diffusionMat = elem_diffusion_matrix+i*nnodes*nnodes; - Scalar* sourceVec = elem_source_vector+i*nnodes; - for(size_t ii=0; iisum_in(row, nnodes, node_ords, - &(diffusionMat[ii*nnodes])); - b->sum_in(1, &row, &(sourceVec[ii])); - } -} - -}; - -#endif diff --git a/kokkos/basic/TBBNode.cpp b/kokkos/basic/TBBNode.cpp deleted file mode 100644 index 20078fd..0000000 --- a/kokkos/basic/TBBNode.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#ifdef MINIFE_HAVE_TBB - -#include "TBBNode.hpp" - -tbb::task_scheduler_init TBBNode::tsi_(tbb::task_scheduler_init::deferred); - -#endif - diff --git a/kokkos/basic/TBBNode.hpp b/kokkos/basic/TBBNode.hpp deleted file mode 100644 index 6b1fe89..0000000 --- a/kokkos/basic/TBBNode.hpp +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef TBBNODE_HPP_ -#define TBBNODE_HPP_ - -#include -#include -#include -#include -#include - -#include - -#include // debug - -template -struct BlockedRangeWDP { - mutable WDPin wd; - BlockedRangeWDP(WDPin &in) : wd(in) {} - inline void operator()(tbb::blocked_range &rng) const - { - for(int i=rng.begin(); i -struct BlockedRangeWDPReducer { - WDPin wd; - BlockedRangeWDPReducer(WDPin &in) : wd(in) {} - BlockedRangeWDPReducer(BlockedRangeWDPReducer &in, tbb::split) : wd(in.wd) - { - wd.result = wd.identity(); - } - void operator()(tbb::blocked_range &rng) - { - for(int i=rng.begin(); i &other ) { - wd.result = wd.reduce( wd.result, other.wd.result ); - } -}; - -class TBBNode : public NoOpMemoryModel { - public: - - TBBNode(int numThreads=0) { - if (numThreads >= 1) { - tsi_.initialize(numThreads); - } - else { - tsi_.initialize(tbb::task_scheduler_init::automatic); - } - } - - ~TBBNode() {} - - template - void parallel_for(int length, WDP wd) { - BlockedRangeWDP tbb_wd(wd); - tbb::parallel_for(tbb::blocked_range(0,length), tbb_wd, tbb::auto_partitioner()); - } - - template - void parallel_reduce(int length, WDP &wd) { - BlockedRangeWDPReducer tbb_wd(wd); - tbb::parallel_reduce(tbb::blocked_range(0,length), tbb_wd, tbb::auto_partitioner()); - wd.result = tbb_wd.wd.result; // have to put result from final tbb_wd into orginal wd - } - - private: - static tbb::task_scheduler_init tsi_; -}; - -#endif diff --git a/kokkos/basic/TPINode.hpp b/kokkos/basic/TPINode.hpp deleted file mode 100644 index 66ec84f..0000000 --- a/kokkos/basic/TPINode.hpp +++ /dev/null @@ -1,113 +0,0 @@ -#ifndef TPINODE_HPP_ -#define TPINODE_HPP_ - -#include - -#include - -#include // debug - -inline -void tpi_work_span(TPI_Work* work, int n, - size_t& ibeg, size_t& iend) -{ - const int chunk = ( n + work->count - 1 ) / work->count ; - - iend = chunk * ( work->rank + 1 ); - ibeg = chunk * ( work->rank ); - - if ( n < iend ) { iend = n; } -} - -template -void tpi_execute(TPI_Work * work) -{ - const WDP* const_wdp = static_cast(work->info); - WDP* wdp = const_cast(const_wdp); - size_t n = wdp->n; - size_t ibeg = 0, iend = n; - tpi_work_span(work, n, ibeg, iend); - for(size_t i=ibeg; i -void tpi_reduction_work(TPI_Work * work) -{ - const WDP* wdp = static_cast(work->info); - size_t n = wdp->n; - size_t ibeg = 0, iend = n; - tpi_work_span(work, n, ibeg, iend); - - typedef typename WDP::ReductionType ReductionType; - ReductionType tmpres = wdp->result, tmpi; - - for(size_t i=ibeg; igenerate(i); - tmpres = wdp->reduce(tmpres, tmpi); - } - *(static_cast(work->reduce)) = tmpres; -} - -template -void tpi_reduction_join(TPI_Work * work, const void* src) -{ - typedef typename WDP::ReductionType ReductionType; - - const WDP* wdp = static_cast(work->info); - - ReductionType& work_reduce = *(static_cast(work->reduce)); - - work_reduce = wdp->reduce(work_reduce, *(static_cast(src)) ); -} - -template -void tpi_reduction_init(TPI_Work * work) -{ - typedef typename WDP::ReductionType ReductionType; - - const WDP* wdp = static_cast(work->info); - - *(static_cast(work->reduce)) = wdp->identity(); -} - -class TPINode : public NoOpMemoryModel { - public: - - TPINode(int numThreads=0) - : numThreads_(numThreads) - { - if (numThreads >= 1) { - TPI_Init(numThreads); - } - } - - ~TPINode() - { - if (numThreads_ >= 1) { - TPI_Finalize(); - } - } - - template - void parallel_for(int length, WDP & wd ) { - TPI_Run_threads(tpi_execute, &wd, 0 ); - } - - template - void parallel_reduce(int length, WDP & wd ) { - typedef typename WDP::ReductionType ReductionType; - ReductionType result = 0; - TPI_Run_threads_reduce(tpi_reduction_work, &wd, - tpi_reduction_join, - tpi_reduction_init, sizeof(result), &result); - wd.result = result; - } - - private: - int numThreads_; -}; - -#endif - diff --git a/kokkos/basic/TypeTraits.hpp b/kokkos/basic/TypeTraits.hpp deleted file mode 100644 index 3ac472c..0000000 --- a/kokkos/basic/TypeTraits.hpp +++ /dev/null @@ -1,137 +0,0 @@ -#ifndef _TypeTraits_hpp_ -#define _TypeTraits_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include - -#ifdef HAVE_MPI -#include -#endif - -namespace miniFE { - -template struct TypeTraits {}; - -template<> -struct TypeTraits { - typedef float magnitude_type; - - static const char* name() {return "float";} - -#ifdef HAVE_MPI - static MPI_Datatype mpi_type() {return MPI_FLOAT;} -#endif -}; - -template<> -struct TypeTraits { - typedef double magnitude_type; - - static const char* name() {return "double";} - -#ifdef HAVE_MPI - static MPI_Datatype mpi_type() {return MPI_DOUBLE;} -#endif -}; - -template<> -struct TypeTraits { - typedef int magnitude_type; - - static const char* name() {return "int";} - -#ifdef HAVE_MPI - static MPI_Datatype mpi_type() {return MPI_INT;} -#endif -}; - -template<> -struct TypeTraits { - typedef long int magnitude_type; - - static const char* name() {return "long int";} - -#ifdef HAVE_MPI - static MPI_Datatype mpi_type() {return MPI_LONG;} -#endif -}; - -#ifndef MINIFE_NO_LONG_LONG - -template<> -struct TypeTraits { - typedef long long magnitude_type; - - static const char* name() {return "long long";} - -#ifdef HAVE_MPI - static MPI_Datatype mpi_type() {return MPI_LONG_LONG;} -#endif -}; - -#endif - -template<> -struct TypeTraits { - typedef unsigned magnitude_type; - - static const char* name() {return "unsigned";} - -#ifdef HAVE_MPI - static MPI_Datatype mpi_type() {return MPI_UNSIGNED;} -#endif -}; - -template<> -struct TypeTraits > { - typedef float magnitude_type; - - static const char* name() {return "std::complex";} - -#ifdef HAVE_MPI - static MPI_Datatype mpi_type() {return MPI_COMPLEX;} -#endif -}; - -template<> -struct TypeTraits > { - typedef double magnitude_type; - - static const char* name() {return "std::complex";} - -#ifdef HAVE_MPI - static MPI_Datatype mpi_type() {return MPI_DOUBLE_COMPLEX;} -#endif -}; - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/Vector.hpp b/kokkos/basic/Vector.hpp deleted file mode 100644 index 4290ae4..0000000 --- a/kokkos/basic/Vector.hpp +++ /dev/null @@ -1,83 +0,0 @@ -#ifndef _Vector_hpp_ -#define _Vector_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include - -#include - -namespace miniFE { - - -template -struct Vector { - typedef ComputeNode ComputeNodeType; - typedef Scalar ScalarType; - typedef LocalOrdinal LocalOrdinalType; - typedef GlobalOrdinal GlobalOrdinalType; - - Vector(GlobalOrdinal startIdx, LocalOrdinal local_sz, ComputeNode& cn) - : startIndex(startIdx), - local_size(local_sz), - coefs(local_size), - compute_node(cn) - { - MemInitOp mem_init; - mem_init.ptr = &coefs[0]; - mem_init.n = local_size; -#ifdef MINIFE_HAVE_CUDA -//we don't want to run this mem-init kernel on cuda, we want -//to just run it locally on the host. - for(size_t i=0; i coefs; - ComputeNode& compute_node; -}; - - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/Vector_functions.hpp b/kokkos/basic/Vector_functions.hpp deleted file mode 100644 index f82866e..0000000 --- a/kokkos/basic/Vector_functions.hpp +++ /dev/null @@ -1,249 +0,0 @@ -#ifndef _Vector_functions_hpp_ -#define _Vector_functions_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -#ifdef MINIFE_HAVE_TBB -#include -#endif - -#include -#include -#include -#include - - -namespace miniFE { - - -template -void write_vector(const std::string& filename, - const VectorType& vec) -{ - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - std::ostringstream osstr; - osstr << filename << "." << numprocs << "." << myproc; - std::string full_name = osstr.str(); - std::ofstream ofs(full_name.c_str()); - - typedef typename VectorType::ScalarType ScalarType; - - const std::vector& coefs = vec.coefs; - for(int p=0; p -void sum_into_vector(size_t num_indices, - const typename VectorType::GlobalOrdinalType* indices, - const typename VectorType::ScalarType* coefs, - VectorType& vec) -{ - typedef typename VectorType::GlobalOrdinalType GlobalOrdinal; - typedef typename VectorType::ScalarType Scalar; - - GlobalOrdinal first = vec.startIndex; - GlobalOrdinal last = first + vec.local_size - 1; - - std::vector& vec_coefs = vec.coefs; - - for(size_t i=0; i last) continue; - size_t idx = indices[i] - first; - vec_coefs[idx] += coefs[i]; - } -} - -#ifdef MINIFE_HAVE_TBB -template -void sum_into_vector(size_t num_indices, - const typename VectorType::GlobalOrdinalType* indices, - const typename VectorType::ScalarType* coefs, - LockingVector& vec) -{ - vec.sum_in(num_indices, indices, coefs); -} -#endif - -//------------------------------------------------------------ -//Compute the update of a vector with the sum of two scaled vectors where: -// -// w = alpha*x + beta*y -// -// x,y - input vectors -// -// alpha,beta - scalars applied to x and y respectively -// -// w - output vector -// -template -void - waxpby(typename VectorType::ScalarType alpha, const VectorType& x, - typename VectorType::ScalarType beta, const VectorType& y, - VectorType& w) -{ - typedef typename VectorType::ScalarType ScalarType; - typedef typename VectorType::ComputeNodeType ComputeNodeType; - - ComputeNodeType& compute_node = x.compute_node; - - WaxpbyOp waxpbyop; - - waxpbyop.w = compute_node.get_buffer(&w.coefs[0], w.coefs.size()); - waxpbyop.x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); - waxpbyop.y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); - waxpbyop.alpha = alpha; - waxpbyop.beta = beta; - waxpbyop.n = x.local_size; - -#ifdef MINIFE_DEBUG - if (y.local_size < x.local_size || w.local_size < x.local_size) { - std::cerr << "miniFE::waxpby ERROR, y and w must be at least as long as x." << std::endl; - return; - } -#endif - - compute_node.parallel_for(waxpbyop.n, waxpbyop); -} - -//Like waxpby above, except operates on two sets of arguments. -//In other words, performs two waxpby operations in one loop. -template -void - fused_waxpby(typename VectorType::ScalarType alpha, const VectorType& x, - typename VectorType::ScalarType beta, const VectorType& y, - VectorType& w, - typename VectorType::ScalarType alpha2, const VectorType& x2, - typename VectorType::ScalarType beta2, const VectorType& y2, - VectorType& w2) -{ - typedef typename VectorType::ScalarType ScalarType; - typedef typename VectorType::ComputeNodeType ComputeNodeType; - - ComputeNodeType& compute_node = x.compute_node; - - FusedWaxpbyOp waxpbyop; - - waxpbyop.w = compute_node.get_buffer(&w.coefs[0], w.coefs.size()); - waxpbyop.x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); - waxpbyop.y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); - waxpbyop.alpha = alpha; - waxpbyop.beta = beta; - waxpbyop.w2 = compute_node.get_buffer(&w2.coefs[0], w2.coefs.size()); - waxpbyop.x2 = compute_node.get_buffer(&x2.coefs[0], x2.coefs.size()); - waxpbyop.y2 = compute_node.get_buffer(&y2.coefs[0], y2.coefs.size()); - waxpbyop.alpha2 = alpha2; - waxpbyop.beta2 = beta2; - waxpbyop.n = x.local_size; - -#ifdef MINIFE_DEBUG - if (y.local_size < x.local_size || w.local_size < x.local_size) { - std::cerr << "miniFE::waxpby ERROR, y and w must be at least as long as x." << std::endl; - return; - } -#endif - - compute_node.parallel_for(waxpbyop.n, waxpbyop); -} - -//----------------------------------------------------------- -//Compute the dot product of two vectors where: -// -// x,y - input vectors -// -// result - return-value -// -template -typename TypeTraits::magnitude_type - dot(const Vector& x, - const Vector& y) -{ - size_t n = x.local_size; - -#ifdef MINIFE_DEBUG - if (y.local_size < n) { - std::cerr << "miniFE::dot ERROR, y must be at least as long as x."<::magnitude_type magnitude; - - typedef typename Vector::ComputeNodeType ComputeNodeType; - - ComputeNodeType& compute_node = x.compute_node; - - DotOp dotop; - dotop.x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); - dotop.y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); - dotop.n = x.local_size; - - compute_node.parallel_reduce(n, dotop); - -#ifdef HAVE_MPI - magnitude local_dot = dotop.result, global_dot = 0; - MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); - MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD); - return global_dot; -#else - return dotop.result; -#endif -} - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/WaxpbyOp.hpp b/kokkos/basic/WaxpbyOp.hpp deleted file mode 100644 index 6eaaa6e..0000000 --- a/kokkos/basic/WaxpbyOp.hpp +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef WAXPBYOP_HPP_ -#define WAXPBYOP_HPP_ - -#ifndef KERNEL_PREFIX -#define KERNEL_PREFIX -#endif - -template -struct WaxpbyOp { - Scalar* w; - const Scalar* x; - const Scalar* y; - Scalar alpha, beta; - size_t n; - KERNEL_PREFIX void operator()(size_t i) const - { - //here we count on the caller (ComputeNode) to pass in 'i' - //that is in the range 0..n-1 - w[i] = alpha*x[i] + beta*y[i]; - } -}; - -template -struct FusedWaxpbyOp { - Scalar* w; - const Scalar* x; - const Scalar* y; - Scalar alpha, beta; - Scalar* w2; - const Scalar* x2; - const Scalar* y2; - Scalar alpha2, beta2; - size_t n; - KERNEL_PREFIX void operator()(size_t i) const - { - //here we count on the caller (ComputeNode) to pass in 'i' - //that is in the range 0..n-1 - w[i] = alpha*x[i] + beta*y[i]; - w2[i] = alpha2*x2[i] + beta2*y2[i]; - } -}; - -#endif diff --git a/kokkos/basic/analytic_soln.hpp b/kokkos/basic/analytic_soln.hpp deleted file mode 100644 index 8dcdfad..0000000 --- a/kokkos/basic/analytic_soln.hpp +++ /dev/null @@ -1,117 +0,0 @@ -#ifndef _analytic_soln_hpp_ -#define _analytic_soln_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include - -#ifndef MINIFE_SCALAR -#define MINIFE_SCALAR double; -#endif - -namespace miniFE { - -typedef MINIFE_SCALAR Scalar; - -// The 'soln' function below computes the analytic solution for -// steady state temperature in a brick-shaped domain (formally called -// a rectangular parallelepiped). The inputs to the function are -// the x,y,z coordinates of the point at which temperature is to be -// computed, and the number of terms p,q in the series expansion. -// -// The equations used for the temperature solution are equations 9 and 10 -// in section 6.2 of Carslaw & Jaeger, "Conduction of Heat in Solids". -// -// The paralellepiped being used is defined by this domain: -// 0 <= x <= 1.0 -// 0 <= y <= 1.0 -// 0 <= z <= 1.0 -// -// With boundary conditions prescribing the temperature to be 1.0 on -// the x==1.0 face, and 0.0 on all other faces. -// -// Thus, in the equations from Carslaw & Jaeger, the following constants -// are used: -// -// a == b == c == 1.0 (the extents of the domain) -// v1 == 0.0 (temperature at x == 0.0) -// v2 == 1.0 (temperature at x == 1.0) -// - -const Scalar PI = 3.141592653589793238462; -const Scalar PI_SQR = PI*PI; -const Scalar term0 = 16.0/(PI_SQR); - -inline Scalar fcn_l(int p, int q) -{ - return std::sqrt((2*p+1)*(2*p+1)*PI_SQR + (2*q+1)*(2*q+1)*PI_SQR); -} - -inline Scalar fcn(int n, Scalar u) -{ - return (2*n+1)*PI*u; -} - -inline Scalar soln(Scalar x, Scalar y, Scalar z, int max_p, int max_q) -{ - Scalar sum = 0; - for(int p=0; p<=max_p; ++p) { - const Scalar p21y = fcn(p, y); - const Scalar sin_py = std::sin(p21y)/(2*p+1); - for(int q=0; q<=max_q; ++q) { - const Scalar q21z = fcn(q, z); - const Scalar sin_qz = std::sin(q21z)/(2*q+1); - - const Scalar l = fcn_l(p, q); - - const Scalar sinh1 = std::sinh(l*x); - const Scalar sinh2 = std::sinh(l); - - const Scalar tmp = (sinh1*sin_py)*(sin_qz/sinh2); - - //if the scalar l gets too big, sinh(l) becomes inf. - //if that happens, tmp is a NaN. - //crude check for NaN: - //if tmp != tmp, tmp is NaN - if (tmp == tmp) { - sum += tmp; - } - else { - //if we got a NaN, break out of this inner loop and go to - //the next iteration of the outer loop. - break; - } - } - } - return term0*sum; -} - -}//namespace miniFE - -#endif /* _analytic_soln_hpp_ */ diff --git a/kokkos/basic/assemble_FE_data.hpp b/kokkos/basic/assemble_FE_data.hpp deleted file mode 100644 index f34b14a..0000000 --- a/kokkos/basic/assemble_FE_data.hpp +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef _assemble_FE_data_hpp_ -#define _assemble_FE_data_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include - -#ifdef MINIFE_HAVE_TBB -//#include -#include -//#include -#else -#include -#endif - -namespace miniFE { - -template -void -assemble_FE_data(const simple_mesh_description& mesh, - MatrixType& A, - VectorType& b, - Parameters& params) -{ - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - - int global_elems_x = mesh.global_box[0][1]; - int global_elems_y = mesh.global_box[1][1]; - int global_elems_z = mesh.global_box[2][1]; - - Box local_elem_box; - copy_box(mesh.local_box, local_elem_box); - - if (get_num_ids(local_elem_box) < 1) { - return; - } - - // - //We want the element-loop to loop over our (processor-local) domain plus a - //ghost layer, so we can assemble the complete linear-system without doing - //any communication. - // - int ghost = 1; - if (local_elem_box[0][0] > 0) local_elem_box[0][0] -= ghost; - if (local_elem_box[1][0] > 0) local_elem_box[1][0] -= ghost; - if (local_elem_box[2][0] > 0) local_elem_box[2][0] -= ghost; - if (local_elem_box[0][1] < global_elems_x) local_elem_box[0][1] += ghost; - if (local_elem_box[1][1] < global_elems_y) local_elem_box[1][1] += ghost; - if (local_elem_box[2][1] < global_elems_z) local_elem_box[2][1] += ghost; - - perform_element_loop(mesh, local_elem_box, A, b, params); -} - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/box_utils.hpp b/kokkos/basic/box_utils.hpp deleted file mode 100644 index ee10975..0000000 --- a/kokkos/basic/box_utils.hpp +++ /dev/null @@ -1,199 +0,0 @@ -#ifndef _box_utils_hpp_ -#define _box_utils_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -#include -#include - -namespace miniFE { - -inline void copy_box(const Box& from_box, Box& to_box) -{ - for(int i=0; i<3; ++i) { - to_box[i][0] = from_box[i][0]; - to_box[i][1] = from_box[i][1]; - } -} - -template -void get_int_coords(GlobalOrdinal ID, int nx, int ny, int nz, - int& x, int& y, int& z) -{ - z = ID/(nx*ny); - y = (ID%(nx*ny))/nx; - x = ID%nx; -} - -template -void get_coords(GlobalOrdinal ID, int nx, int ny, int nz, - Scalar& x, Scalar& y, Scalar& z) -{ - const int xdiv = nx>1 ? nx-1 : 1; - const int ydiv = ny>1 ? ny-1 : 1; - const int zdiv = nz>1 ? nz-1 : 1; - -//This code assumes that ID is 0-based. -// -//compute coordinates that lie on (or in) the unit cube. -//that's why we're dividing by nz,ny,nx: - z = (1.0*(ID/(nx*ny)))/zdiv; - y = 1.0*((ID%(nx*ny))/nx)/ydiv; - x = 1.0*(ID%nx)/xdiv; -} - -template -GlobalOrdinal get_num_ids(const Box& box) -{ - int nx = box[0][1] - box[0][0]; - int ny = box[1][1] - box[1][0]; - int nz = box[2][1] - box[2][0]; - GlobalOrdinal tmp = nx*ny; - tmp *= nz; - return tmp; -} - -template -GlobalOrdinal get_id(int nx, int ny, int nz, - int x, int y, int z) -{ - if (x<0 || y<0 || z<0) return -1; - if (x>=nx || y>=ny || z>=nz) return -1; - - //form x + nx*y + nx*ny*z: - - GlobalOrdinal tmp = nx*ny; - tmp *= z; - tmp = x + nx * y + tmp; - return tmp; -} - -template -void get_ids(int nx, int ny, int nz, - const Box& box, - GlobalOrdinal* ids) -{ - unsigned offset = 0; - for(int z=box[2][0]; z(nx, ny, nz, x, y, z); - } - } - } -} - -template -void create_map_id_to_row(int global_nx, int global_ny, int global_nz, - const Box& box, - std::map& id_to_row) -{ - GlobalOrdinal num_my_ids = get_num_ids(box); - GlobalOrdinal my_first_row = 0; - -#ifdef HAVE_MPI - int numprocs = 1, myproc = 0; - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); - - typename std::vector tmp_buffer(numprocs, 0); - tmp_buffer[myproc] = num_my_ids; - typename std::vector global_offsets(numprocs); - MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); - MPI_Allreduce(&tmp_buffer[0], &global_offsets[0], numprocs, mpi_dtype, - MPI_SUM, MPI_COMM_WORLD); - GlobalOrdinal offset = 0; - for(int i=0; i all_my_ids(num_my_ids); - get_ids(global_nx, global_ny, global_nz, box, &all_my_ids[0]); - - typename std::vector ids; - typename std::vector rows; - - if (all_my_ids.size() > 0) { - ids.push_back(all_my_ids[0]); - rows.push_back(my_first_row); - } - - for(size_t i=1; i lengths(numprocs); - MPI_Allgather(&len, 1, MPI_INT, &lengths[0], 1, MPI_INT, MPI_COMM_WORLD); - - std::vector displs(lengths); - int displ = 0; - for(int i=0; i global_ids(displ); - typename std::vector global_rows(displ); - - MPI_Allgatherv(&ids[0], len, mpi_dtype, &global_ids[0], - &lengths[0], &displs[0], mpi_dtype, MPI_COMM_WORLD); - MPI_Allgatherv(&rows[0], len, mpi_dtype, &global_rows[0], - &lengths[0], &displs[0], mpi_dtype, MPI_COMM_WORLD); - - ids = global_ids; - rows = global_rows; -#endif - - for(size_t i=0; i -#include - -#include -#include - -#include - -namespace miniFE { - -template -void print_vec(const std::vector& vec, const std::string& name) -{ - for(size_t i=0; i -bool breakdown(typename VectorType::ScalarType inner, - const VectorType& v, - const VectorType& w) -{ - typedef typename VectorType::ScalarType Scalar; - typedef typename TypeTraits::magnitude_type magnitude; - -//This is code that was copied from Aztec, and originally written -//by my hero, Ray Tuminaro. -// -//Assuming that inner = (inner product of v and w), -//v and w are considered orthogonal if -// |inner| < 100 * ||v||_2 * ||w||_2 * epsilon - - magnitude vnorm = std::sqrt(dot(v,v)); - magnitude wnorm = std::sqrt(dot(w,w)); - return std::abs(inner) <= 100*vnorm*wnorm*std::numeric_limits::epsilon(); -} - -template -void -cg_solve(OperatorType& A, - const VectorType& b, - VectorType& x, - Matvec matvec, - typename OperatorType::LocalOrdinalType max_iter, - typename TypeTraits::magnitude_type& tolerance, - typename OperatorType::LocalOrdinalType& num_iters, - typename TypeTraits::magnitude_type& normr, - timer_type* my_cg_times) -{ - typedef typename OperatorType::ScalarType ScalarType; - typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType; - typedef typename OperatorType::LocalOrdinalType LocalOrdinalType; - typedef typename TypeTraits::magnitude_type magnitude_type; - - timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0; - timer_type total_time = mytimer(); - - int myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - if (!A.has_local_indices) { - std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means " - << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve." - << std::endl; - return; - } - - size_t nrows = A.rows.size(); - LocalOrdinalType ncols = A.num_cols; - - VectorType r(b.startIndex, nrows, b.compute_node); - VectorType p(0, ncols, b.compute_node); - VectorType Ap(b.startIndex, nrows, b.compute_node); - - normr = 0; - magnitude_type rtrans = 0; - magnitude_type oldrtrans = 0; - - LocalOrdinalType print_freq = max_iter/10; - if (print_freq>50) print_freq = 50; - if (print_freq<1) print_freq = 1; - - ScalarType one = 1.0; - ScalarType zero = 0.0; - - typedef typename VectorType::ComputeNodeType ComputeNodeType; - ComputeNodeType& compute_node = x.compute_node; - - //The following lines that create and initialize buffers are no-ops in many - //cases, but perform actual allocations and copies if an off-cpu device such - //as a GPU is being used by compute_node. - - //Do any required allocations for buffers that will be needed during CG: - ScalarType* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); - ScalarType* d_p = compute_node.get_buffer(&p.coefs[0], p.coefs.size()); - ScalarType* d_b = compute_node.get_buffer(&b.coefs[0], b.coefs.size()); - ScalarType* d_Ap = compute_node.get_buffer(&Ap.coefs[0], Ap.coefs.size()); - ScalarType* d_r = compute_node.get_buffer(&r.coefs[0], r.coefs.size()); -#ifdef MINIFE_CSR_MATRIX - LocalOrdinalType* d_Arowoff = compute_node.get_buffer(&A.row_offsets[0], A.row_offsets.size()); - GlobalOrdinalType* d_Acols = compute_node.get_buffer(&A.packed_cols[0], A.packed_cols.size()); - ScalarType* d_Acoefs = compute_node.get_buffer(&A.packed_coefs[0], A.packed_coefs.size()); -#endif -#ifdef MINIFE_ELL_MATRIX - GlobalOrdinalType* d_Acols = compute_node.get_buffer(&A.cols[0], A.cols.size()); - ScalarType* d_Acoefs = compute_node.get_buffer(&A.coefs[0], A.coefs.size()); -#endif - - //Copy data to buffers that need to be initialized from input data: - compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); - compute_node.copy_to_buffer(&b.coefs[0], b.coefs.size(), d_b); -#ifdef MINIFE_CSR_MATRIX - compute_node.copy_to_buffer(&A.row_offsets[0], A.row_offsets.size(), d_Arowoff); - compute_node.copy_to_buffer(&A.packed_cols[0], A.packed_cols.size(), d_Acols); - compute_node.copy_to_buffer(&A.packed_coefs[0], A.packed_coefs.size(), d_Acoefs); -#endif -#ifdef MINIFE_ELL_MATRIX - compute_node.copy_to_buffer(&A.cols[0], A.cols.size(), d_Acols); - compute_node.copy_to_buffer(&A.coefs[0], A.coefs.size(), d_Acoefs); -#endif - - TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY); - - compute_node.copy_from_buffer(&p.coefs[0], p.coefs.size(), d_p); -// print_vec(p.coefs, "p"); - - TICK(); - matvec(A, p, Ap); - TOCK(tMATVEC); - - TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY); - -// if (b.coefs.size() == r.coefs.size()) std::cout << "b.size == r.size" << std::endl; -// else std::cout << "b.size != r.size" << std::endl; -// if (b.coefs == r.coefs) std::cout << "b == r" << std::endl; -// else std::cout << "b != r" << std::endl; -// compute_node.copy_from_buffer(&r.coefs[0], r.coefs.size(), d_r); -// print_vec(b.coefs, "b"); -// print_vec(r.coefs, "r"); - - TICK(); rtrans = dot(r, r); TOCK(tDOT); - -//std::cout << "rtrans="<add("Global Nrows",global_nrows); - ydoc.get("Matrix attributes")->add("Global NNZ",global_nnz); - - //compute how much memory the matrix occupies: - //num-bytes = sizeof(GlobalOrdinal)*global_nrows for A.rows - // + sizeof(LocalOrdinal)*global_nrows for A.rows_offsets - // + sizeof(GlobalOrdinal)*global_nnz for A.packed_cols - // + sizeof(Scalar)*global_nnz for A.packed_coefs - - double invGB = 1.0/(1024*1024*1024); - double memGB = invGB*global_nrows*sizeof(GlobalOrdinal); - memGB += invGB*global_nrows*sizeof(LocalOrdinal); - memGB += invGB*global_nnz*sizeof(GlobalOrdinal); - memGB += invGB*global_nnz*sizeof(Scalar); - ydoc.get("Matrix attributes")->add("Global Memory (GB)",memGB); - - ydoc.get("Matrix attributes")->add("Pll Memory Overhead (MB)",mem_overhead_MB); - - ydoc.get("Matrix attributes")->add("Rows per proc MIN",min_nrows); - ydoc.get("Matrix attributes")->add("Rows per proc MAX",max_nrows); - ydoc.get("Matrix attributes")->add("Rows per proc AVG",avg_nrows); - ydoc.get("Matrix attributes")->add("NNZ per proc MIN",min_nnz); - ydoc.get("Matrix attributes")->add("NNZ per proc MAX",max_nnz); - ydoc.get("Matrix attributes")->add("NNZ per proc AVG",avg_nnz); - } - - return global_nnz; -} - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/driver.hpp b/kokkos/basic/driver.hpp deleted file mode 100644 index d3966eb..0000000 --- a/kokkos/basic/driver.hpp +++ /dev/null @@ -1,403 +0,0 @@ -#ifndef _driver_hpp_ -#define _driver_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include -#include -#include -#include -#include - -#include -#include - -#ifdef MINIFE_CSR_MATRIX -#include -#elif defined(MINIFE_ELL_MATRIX) -#include -#else -#include -#endif - -#include - -#include - -#include -#include - -#include - -#include -#include -#include -#include -#if MINIFE_KERNELS != 0 -#include -#endif -#include -#include -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -#define RUN_TIMED_FUNCTION(msg, fn, time_inc, time_total) \ -{ \ - if (myproc==0) { \ - std::cout.width(30); \ - std::cout << msg; \ - std::cout.flush(); \ - } \ - timer_type rtf_t0 = mytimer(); \ - fn; \ - time_inc = mytimer() - rtf_t0; \ - time_total += time_inc; \ - if (myproc==0) { \ - std::cout << time_inc << "s, total time: " << time_total << std::endl; \ - } \ -} - -//This program assembles finite-element matrices into a global matrix and -//vector, then solves the linear-system using Conjugate Gradients. -//Each finite-element is a hexahedron with 8 vertex-nodes. -// -//Notes: -//- In finite-element terms, the box dimensions are in elements, not nodes. -// In other words, a 2x2x2 box describes 8 elements, each of which has 8 nodes, -// so it is a 3x3x3 node domain (27 nodes). -// The assembled linear system will have 1 equation for each finite element node. -// -//- The coordinate origin is at the corner of the global box where x=0, -// y=0, z=0, and the box extends along the positive x-axis, positive y-axis, -// and the positive z-axis. -// -//- Some aspects of matrix-structure generation and finite-element assembly -// are convenient to do using global node identifiers. -// A global identifier for each node is obtained from coordinates plus -// global box dimensions. See the function 'get_id' in box_utils.hpp. -// -//- Each node corresponds to a row in the matrix. The RCB partitioning method -// we use to split the global box among processors results in some -// processors owning non-contiguous blocks of global node identifiers. -// Since it is convenient for matrices and vectors to store contiguously- -// numbered blocks of rows, we map global node identifiers to a separate -// space of row numbers such that each processor's nodes correspond to a -// contiguous block of row numbers. -// - -namespace miniFE { - -template -void -driver(const Box& global_box, Box& my_box, ComputeNodeType& compute_node, - Parameters& params, YAML_Doc& ydoc) -{ - int global_nx = global_box[0][1]; - int global_ny = global_box[1][1]; - int global_nz = global_box[2][1]; - - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - if (params.load_imbalance > 0) { - add_imbalance(global_box, my_box, params.load_imbalance, ydoc); - } - - float largest_imbalance = 0, std_dev = 0; - compute_imbalance(global_box, my_box, largest_imbalance, - std_dev, ydoc, true); - - - //Create a representation of the mesh: - //Note that 'simple_mesh_description' is a virtual or conceptual - //mesh that doesn't actually store mesh data. - - if (myproc==0) { - std::cout.width(30); - std::cout << "creating/filling mesh..."; - std::cout.flush(); - } - - timer_type t_start = mytimer(); - timer_type t0 = mytimer(); - - simple_mesh_description mesh(global_box, my_box); - - timer_type mesh_fill = mytimer() - t0; - timer_type t_total = mytimer() - t_start; - - if (myproc==0) { - std::cout << mesh_fill << "s, total time: " << t_total << std::endl; - } - - //next we will generate the matrix structure. - - //Declare matrix object: - -#ifdef MINIFE_CSR_MATRIX - typedef CSRMatrix MatrixType; -#elif defined(MINIFE_ELL_MATRIX) - typedef ELLMatrix MatrixType; -#else - typedef CSRMatrix MatrixType; -#endif - - MatrixType A(compute_node); - - timer_type gen_structure; - RUN_TIMED_FUNCTION("generating matrix structure...", - generate_matrix_structure(mesh, A), - gen_structure, t_total); - - GlobalOrdinal local_nrows = A.rows.size(); - GlobalOrdinal my_first_row = local_nrows > 0 ? A.rows[0] : -1; - - Vector b(my_first_row, local_nrows,compute_node); - Vector x(my_first_row, local_nrows,compute_node); - - //Assemble finite-element sub-matrices and sub-vectors into the global - //linear system: - - timer_type fe_assembly; - RUN_TIMED_FUNCTION("assembling FE data...", - assemble_FE_data(mesh, A, b, params), - fe_assembly, t_total); - - if (myproc == 0) { - ydoc.add("Matrix structure generation",""); - ydoc.get("Matrix structure generation")->add("Mat-struc-gen Time",gen_structure); - ydoc.add("FE assembly",""); - ydoc.get("FE assembly")->add("FE assembly Time",fe_assembly); - } - -#ifdef MINIFE_DEBUG - write_matrix("A_prebc.mtx", A); - write_vector("b_prebc.vec", b); -#endif - - //Now apply dirichlet boundary-conditions - //(Apply the 0-valued surfaces first, then the 1-valued surface last.) - - timer_type dirbc_time; - RUN_TIMED_FUNCTION("imposing Dirichlet BC...", - impose_dirichlet(0.0, A, b, global_nx+1, global_ny+1, global_nz+1, mesh.bc_rows_0), dirbc_time, t_total); - RUN_TIMED_FUNCTION("imposing Dirichlet BC...", - impose_dirichlet(1.0, A, b, global_nx+1, global_ny+1, global_nz+1, mesh.bc_rows_1), dirbc_time, t_total); - -#ifdef MINIFE_DEBUG - write_matrix("A.mtx", A); - write_vector("b.vec", b); -#endif - - //Transform global indices to local, set up communication information: - - timer_type make_local_time; - RUN_TIMED_FUNCTION("making matrix indices local...", - make_local_matrix(A), - make_local_time, t_total); - -#ifdef MINIFE_DEBUG - write_matrix("A_local.mtx", A); - write_vector("b_local.vec", b); -#endif - - size_t global_nnz = compute_matrix_stats(A, myproc, numprocs, ydoc); - - //Prepare to perform conjugate gradient solve: - - LocalOrdinal max_iters = 50; - LocalOrdinal num_iters = 0; - typedef typename TypeTraits::magnitude_type magnitude; - magnitude rnorm = 0; - magnitude tol = std::numeric_limits::epsilon(); - - timer_type cg_times[NUM_TIMERS]; - - typedef Vector VectorType; - - t_total = mytimer() - t_start; - - bool matvec_with_comm_overlap = params.mv_overlap_comm_comp==1; - -#if MINIFE_KERNELS != 0 - if (myproc==0) { - std::cout.width(30); - std::cout << "Starting kernel timing loops ..." << std::endl; - } - - max_iters = 500; - x.coefs[0] = 0.9; - if (matvec_with_comm_overlap) { - time_kernels(A, b, x, matvec_overlap(), max_iters, rnorm, cg_times); - } - else { - time_kernels(A, b, x, matvec_std(), max_iters, rnorm, cg_times); - } - num_iters = max_iters; - std::string title("Kernel timings"); -#else - if (myproc==0) { - std::cout << "Starting CG solver ... " << std::endl; - } - - if (matvec_with_comm_overlap) { -#ifdef MINIFE_CSR_MATRIX - rearrange_matrix_local_external(A); - cg_solve(A, b, x, matvec_overlap(), max_iters, tol, - num_iters, rnorm, cg_times); -#else - std::cout << "ERROR, matvec with overlapping comm/comp only works with CSR matrix."<(), max_iters, tol, - num_iters, rnorm, cg_times); - if (myproc == 0) { - std::cout << "Final Resid Norm: " << rnorm << std::endl; - } - -#ifdef MINIFE_DEBUG - if (myproc == 0) { - std::cout << "verifying solution..." << std::endl; - } - verify_solution(mesh, x); -#endif - } - -#ifdef MINIFE_DEBUG - write_vector("x.vec", x); -#endif - std::string title("CG solve"); -#endif - - if (myproc == 0) { - ydoc.get("Global Run Parameters")->add("ScalarType",TypeTraits::name()); - ydoc.get("Global Run Parameters")->add("GlobalOrdinalType",TypeTraits::name()); - ydoc.get("Global Run Parameters")->add("LocalOrdinalType",TypeTraits::name()); - ydoc.add(title,""); - ydoc.get(title)->add("Iterations",num_iters); - ydoc.get(title)->add("Final Resid Norm",rnorm); - - GlobalOrdinal global_nrows = global_nx; - global_nrows *= global_ny*global_nz; - - //flops-per-mv, flops-per-dot, flops-per-waxpy: - double mv_flops = global_nnz*2.0; - double dot_flops = global_nrows*2.0; - double waxpy_flops = global_nrows*3.0; - -#if MINIFE_KERNELS == 0 -//if MINIFE_KERNELS == 0 then we did a CG solve, and in that case -//there were num_iters+1 matvecs, num_iters*2 dots, and num_iters*3+2 waxpys. - mv_flops *= (num_iters+1); - dot_flops *= (2*num_iters); - waxpy_flops *= (3*num_iters+2); -#else -//if MINIFE_KERNELS then we did one of each operation per iteration. - mv_flops *= num_iters; - dot_flops *= num_iters; - waxpy_flops *= num_iters; -#endif - - double total_flops = mv_flops + dot_flops + waxpy_flops; - - double mv_mflops = -1; - if (cg_times[MATVEC] > 1.e-4) - mv_mflops = 1.e-6 * (mv_flops/cg_times[MATVEC]); - - double dot_mflops = -1; - if (cg_times[DOT] > 1.e-4) - dot_mflops = 1.e-6 * (dot_flops/cg_times[DOT]); - - double waxpy_mflops = -1; - if (cg_times[WAXPY] > 1.e-4) - waxpy_mflops = 1.e-6 * (waxpy_flops/cg_times[WAXPY]); - - double total_mflops = -1; - if (cg_times[TOTAL] > 1.e-4) - total_mflops = 1.e-6 * (total_flops/cg_times[TOTAL]); - - ydoc.get(title)->add("WAXPY Time",cg_times[WAXPY]); - ydoc.get(title)->add("WAXPY Flops",waxpy_flops); - if (waxpy_mflops >= 0) - ydoc.get(title)->add("WAXPY Mflops",waxpy_mflops); - else - ydoc.get(title)->add("WAXPY Mflops","inf"); - - ydoc.get(title)->add("DOT Time",cg_times[DOT]); - ydoc.get(title)->add("DOT Flops",dot_flops); - if (dot_mflops >= 0) - ydoc.get(title)->add("DOT Mflops",dot_mflops); - else - ydoc.get(title)->add("DOT Mflops","inf"); - - ydoc.get(title)->add("MATVEC Time",cg_times[MATVEC]); - ydoc.get(title)->add("MATVEC Flops",mv_flops); - if (mv_mflops >= 0) - ydoc.get(title)->add("MATVEC Mflops",mv_mflops); - else - ydoc.get(title)->add("MATVEC Mflops","inf"); - -#ifdef MINIFE_FUSED - ydoc.get(title)->add("MATVECDOT Time",cg_times[MATVECDOT]); - ydoc.get(title)->add("MATVECDOT Flops",mv_flops); - if (mv_mflops >= 0) - ydoc.get(title)->add("MATVECDOT Mflops",mv_mflops); - else - ydoc.get(title)->add("MATVECDOT Mflops","inf"); -#endif - -#if MINIFE_KERNELS == 0 - ydoc.get(title)->add("Total",""); - ydoc.get(title)->get("Total")->add("Total CG Time",cg_times[TOTAL]); - ydoc.get(title)->get("Total")->add("Total CG Flops",total_flops); - if (total_mflops >= 0) - ydoc.get(title)->get("Total")->add("Total CG Mflops",total_mflops); - else - ydoc.get(title)->get("Total")->add("Total CG Mflops","inf"); - ydoc.get(title)->add("Time per iteration",cg_times[TOTAL]/num_iters); -#endif - } -} - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/exchange_externals.hpp b/kokkos/basic/exchange_externals.hpp deleted file mode 100644 index 167ba1b..0000000 --- a/kokkos/basic/exchange_externals.hpp +++ /dev/null @@ -1,270 +0,0 @@ -#ifndef _exchange_externals_hpp_ -#define _exchange_externals_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -#include - -#include - -namespace miniFE { - -template -void -exchange_externals(MatrixType& A, - VectorType& x) -{ -#ifdef HAVE_MPI -#ifdef MINIFE_DEBUG - std::ostream& os = outstream(); - os << "entering exchange_externals\n"; -#endif - - int numprocs = 1; - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - - if (numprocs < 2) return; - - typedef typename MatrixType::ScalarType Scalar; - typedef typename MatrixType::LocalOrdinalType LocalOrdinal; - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - - // Extract Matrix pieces - - int local_nrow = A.rows.size(); - int num_neighbors = A.neighbors.size(); - const std::vector& recv_length = A.recv_length; - const std::vector& send_length = A.send_length; - const std::vector& neighbors = A.neighbors; - const std::vector& elements_to_send = A.elements_to_send; - - std::vector& send_buffer = A.send_buffer; - - // - // first post receives, these are immediate receives - // Do not wait for result to come, will do that at the - // wait call below. - // - - int MPI_MY_TAG = 99; - - std::vector& request = A.request; - - // - // Externals are at end of locals - // - - std::vector& x_coefs = x.coefs; - Scalar* x_external = &(x_coefs[local_nrow]); - - MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); - - // Post receives first - for(int i=0; i x.coefs.size()) { - os << "error, out-of-range. x.coefs.size()=="< exch_ext_requests; -#endif - -template -void -begin_exchange_externals(MatrixType& A, - VectorType& x) -{ -#ifdef HAVE_MPI - - int numprocs = 1, myproc = 0; - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); - - if (numprocs < 2) return; - - typedef typename MatrixType::ScalarType Scalar; - typedef typename MatrixType::LocalOrdinalType LocalOrdinal; - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - - // Extract Matrix pieces - - int local_nrow = A.rows.size(); - int num_neighbors = A.neighbors.size(); - const std::vector& recv_length = A.recv_length; - const std::vector& send_length = A.send_length; - const std::vector& neighbors = A.neighbors; - const std::vector& elements_to_send = A.elements_to_send; - - std::vector send_buffer(elements_to_send.size(), 0); - - // - // first post receives, these are immediate receives - // Do not wait for result to come, will do that at the - // wait call below. - // - - int MPI_MY_TAG = 99; - - exch_ext_requests.resize(num_neighbors); - - // - // Externals are at end of locals - // - - std::vector& x_coefs = x.coefs; - Scalar* x_external = &(x_coefs[local_nrow]); - - MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); - - // Post receives first - for(int i=0; i -#include -#include -#include - -#include -#include -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -namespace miniFE { - -template -int -generate_matrix_structure(const simple_mesh_description& mesh, - MatrixType& A) -{ - int myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - int threw_exc = 0; - try { - - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - typedef typename MatrixType::LocalOrdinalType LocalOrdinal; - - int global_nodes_x = mesh.global_box[0][1]+1; - int global_nodes_y = mesh.global_box[1][1]+1; - int global_nodes_z = mesh.global_box[2][1]+1; - Box box; - copy_box(mesh.local_box, box); - - //num-owned-nodes in each dimension is num-elems+1 - //only if num-elems > 0 in that dimension *and* - //we are at the high end of the global range in that dimension: - if (box[0][1] > box[0][0] && box[0][1] == mesh.global_box[0][1]) ++box[0][1]; - if (box[1][1] > box[1][0] && box[1][1] == mesh.global_box[1][1]) ++box[1][1]; - if (box[2][1] > box[2][0] && box[2][1] == mesh.global_box[2][1]) ++box[2][1]; - - GlobalOrdinal global_nrows = global_nodes_x; - global_nrows *= global_nodes_y*global_nodes_z; - - GlobalOrdinal nrows = get_num_ids(box); - try { - A.reserve_space(nrows, 27); - } - catch(std::exception& exc) { - std::ostringstream osstr; - osstr << "One of A.rows.resize, A.row_offsets.resize, A.packed_cols.reserve or A.packed_coefs.reserve: nrows=" < rows(nrows); - std::vector row_offsets(nrows+1); - std::vector row_coords(nrows*3); - - unsigned roffset = 0; - GlobalOrdinal nnz = 0; - - for(int iz=box[2][0]; iz(global_nodes_x, global_nodes_y, global_nodes_z, - ix, iy, iz); - rows[roffset] = mesh.map_id_to_row(row_id); - row_coords[roffset*3] = ix; - row_coords[roffset*3+1] = iy; - row_coords[roffset*3+2] = iz; - row_offsets[roffset++] = nnz; - - GlobalOrdinal row_begin_offset = nnz; - for(int sz=-1; sz<=1; ++sz) { - for(int sy=-1; sy<=1; ++sy) { - for(int sx=-1; sx<=1; ++sx) { - GlobalOrdinal col_id = - get_id(global_nodes_x, global_nodes_y, global_nodes_z, - ix+sx, iy+sy, iz+sz); - if (col_id >= 0 && col_id < global_nrows) { - ++nnz; - } - } - } - } - } - } - } - row_offsets[roffset] = nnz; - init_matrix(A, rows, row_offsets, row_coords, - global_nodes_x, global_nodes_y, global_nodes_z, global_nrows, mesh); - } - catch(...) { - std::cout << "proc " << myproc << " threw an exception in generate_matrix_structure, probably due to running out of memory." << std::endl; - threw_exc = 1; - } -#ifdef HAVE_MPI - int global_throw = 0; - MPI_Allreduce(&threw_exc, &global_throw, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - threw_exc = global_throw; -#endif - if (threw_exc) { - return 1; - } - - return 0; -} - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/get_common_files b/kokkos/basic/get_common_files deleted file mode 100755 index dec46a7..0000000 --- a/kokkos/basic/get_common_files +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -dir=../../common - -cp ${dir}/YAML_Doc.cpp . -cp ${dir}/YAML_Doc.hpp . -cp ${dir}/YAML_Element.cpp . -cp ${dir}/YAML_Element.hpp . - -cp ${dir}/generate_info_header . - diff --git a/kokkos/basic/imbalance.hpp b/kokkos/basic/imbalance.hpp deleted file mode 100644 index f801efc..0000000 --- a/kokkos/basic/imbalance.hpp +++ /dev/null @@ -1,271 +0,0 @@ -#ifndef _imbalance_hpp_ -#define _imbalance_hpp_ - -#include - -#ifdef HAVE_MPI -#include -#endif - -#include -#include -#include - -namespace miniFE { - -const int X = 0; -const int Y = 1; -const int Z = 2; -const int NONE = 3; - -const int LOWER = 0; -const int UPPER = 1; - -template -void -compute_imbalance(const Box& global_box, - const Box& local_box, - float& largest_imbalance, - float& std_dev, - YAML_Doc& doc, - bool record_in_doc) -{ - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - GlobalOrdinal local_nrows = get_num_ids(local_box); - GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0; - int min_proc = myproc, max_proc = myproc; - get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc, - max_nrows, max_proc); - - float avg_nrows = global_nrows; - avg_nrows /= numprocs; - - //largest_imbalance will be the difference between the min (or max) - //rows-per-processor and avg_nrows, represented as a percentage: - largest_imbalance = percentage_difference(min_nrows, avg_nrows); - - float tmp = percentage_difference(max_nrows, avg_nrows); - if (tmp > largest_imbalance) largest_imbalance = tmp; - - std_dev = compute_std_dev_as_percentage(local_nrows, avg_nrows); - - if (myproc == 0 && record_in_doc) { - doc.add("Rows-per-proc Load Imbalance",""); - doc.get("Rows-per-proc Load Imbalance")->add("Largest (from avg, %)",largest_imbalance); - doc.get("Rows-per-proc Load Imbalance")->add("Std Dev (%)",std_dev); - } -} - -std::pair -decide_how_to_grow(const Box& global_box, const Box& local_box) -{ - std::pair result(NONE,UPPER); - - if (local_box[Z][UPPER] < global_box[Z][UPPER]) { - result.first = Z; - result.second = UPPER; - return result; - } - if (local_box[Z][LOWER] > global_box[Z][LOWER]) { - result.first = Z; - result.second = LOWER; - return result; - } - if (local_box[Y][UPPER] < global_box[Y][UPPER]) { - result.first = Y; - result.second = UPPER; - return result; - } - if (local_box[Y][LOWER] > global_box[Y][LOWER]) { - result.first = Y; - result.second = LOWER; - return result; - } - if (local_box[X][UPPER] < global_box[X][UPPER]) { - result.first = X; - result.second = UPPER; - return result; - } - if (local_box[X][LOWER] > global_box[X][LOWER]) { - result.first = X; - result.second = LOWER; - return result; - } - return result; -} - -std::pair -decide_how_to_shrink(const Box& global_box, const Box& local_box) -{ - std::pair result(NONE,UPPER); - - if (local_box[Z][UPPER] < global_box[Z][UPPER] && local_box[Z][UPPER]-local_box[Z][LOWER] > 2) { - result.first = Z; - result.second = UPPER; - return result; - } - if (local_box[Z][LOWER] > global_box[Z][LOWER] && local_box[Z][UPPER]-local_box[Z][LOWER] > 2) { - result.first = Z; - result.second = LOWER; - return result; - } - if (local_box[Y][UPPER] < global_box[Y][UPPER] && local_box[Y][UPPER]-local_box[Y][LOWER] > 2) { - result.first = Y; - result.second = UPPER; - return result; - } - if (local_box[Y][LOWER] > global_box[Y][LOWER] && local_box[Y][UPPER]-local_box[Y][LOWER] > 2) { - result.first = Y; - result.second = LOWER; - return result; - } - if (local_box[X][UPPER] < global_box[X][UPPER] && local_box[X][UPPER]-local_box[X][LOWER] > 2) { - result.first = X; - result.second = UPPER; - return result; - } - if (local_box[X][LOWER] > global_box[X][LOWER] && local_box[X][UPPER]-local_box[X][LOWER] > 2) { - result.first = X; - result.second = LOWER; - return result; - } - return result; -} - -template -void -add_imbalance(const Box& global_box, - Box& local_box, - float imbalance, - YAML_Doc& doc) -{ - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - if (numprocs == 1) { - return; - } - - float cur_imbalance = 0, cur_std_dev = 0; - compute_imbalance(global_box, local_box, - cur_imbalance, cur_std_dev, doc, false); - - while (cur_imbalance < imbalance) { - GlobalOrdinal local_nrows = get_num_ids(local_box); - GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0; - int min_proc = myproc, max_proc = myproc; - get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc, - max_nrows, max_proc); - - std::pair grow(NONE,UPPER); - int grow_axis_val = -1; - std::pair shrink(NONE,UPPER); - int shrink_axis_val = -1; - - if (myproc == max_proc) { - grow = decide_how_to_grow(global_box, local_box); - if (grow.first != NONE) { - grow_axis_val = local_box[grow.first][grow.second]; - } - } - if (myproc == min_proc) { - shrink = decide_how_to_shrink(global_box, local_box); - if (shrink.first != NONE) { - shrink_axis_val = local_box[shrink.first][shrink.second]; - } - } - - int grow_info[8] = {grow.first, grow.second, - local_box[X][0], local_box[X][1], - local_box[Y][0], local_box[Y][1], - local_box[Z][0], local_box[Z][1]}; - - int shrink_info[8] = {shrink.first, shrink.second, - local_box[X][0], local_box[X][1], - local_box[Y][0], local_box[Y][1], - local_box[Z][0], local_box[Z][1]}; -#ifdef HAVE_MPI - MPI_Bcast(&grow_info[0], 8, MPI_INT, max_proc, MPI_COMM_WORLD); - MPI_Bcast(&shrink_info[0], 8, MPI_INT, min_proc, MPI_COMM_WORLD); -#endif - - int grow_axis = grow_info[0]; - int grow_end = grow_info[1]; - int shrink_axis = shrink_info[0]; - int shrink_end = shrink_info[1]; - int grow_incr = 1; - if (grow_end == LOWER) grow_incr = -1; - int shrink_incr = -1; - if (shrink_end == LOWER) shrink_incr = 1; - if (grow_axis != NONE) grow_axis_val = grow_info[2+grow_axis*2+grow_end]; - if (shrink_axis != NONE) shrink_axis_val = shrink_info[2+shrink_axis*2+shrink_end]; - - if (grow_axis == NONE && shrink_axis == NONE) break; - - bool grow_status = grow_axis==NONE ? false : true; - if (grow_axis != NONE) { - if ((grow_incr == 1 && local_box[grow_axis][0] == grow_axis_val) || - (grow_incr == -1 && local_box[grow_axis][1] == grow_axis_val)) { - if (local_box[grow_axis][1] - local_box[grow_axis][0] < 2) { - grow_status = false; - } - } - } - - bool shrink_status = shrink_axis==NONE ? false : true; - if (shrink_axis != NONE) { - if ((shrink_incr == 1 && local_box[shrink_axis][0] == shrink_axis_val) || - (shrink_incr == -1 && local_box[shrink_axis][1] == shrink_axis_val)) { - if (local_box[shrink_axis][1] - local_box[shrink_axis][0] < 2) { - shrink_status = false; - } - } - } - -#ifdef HAVE_MPI - int statusints[2] = { grow_status ? 0 : 1, shrink_status ? 0 : 1 }; - int globalstatus[2] = { 0, 0 }; - MPI_Allreduce(&statusints, &globalstatus, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - grow_status = globalstatus[0]>0 ? false : true; - shrink_status = globalstatus[1]>0 ? false : true; -#endif - - if (grow_status == false && shrink_status == false) break; - - if (grow_status && grow_axis != NONE) { - if (local_box[grow_axis][0] == grow_axis_val) { - local_box[grow_axis][0] += grow_incr; - } - - if (local_box[grow_axis][1] == grow_axis_val) { - local_box[grow_axis][1] += grow_incr; - } - } - - if (shrink_status && shrink_axis != NONE) { - if (local_box[shrink_axis][0] == shrink_axis_val) { - local_box[shrink_axis][0] += shrink_incr; - } - - if (local_box[shrink_axis][1] == shrink_axis_val) { - local_box[shrink_axis][1] += shrink_incr; - } - } - - compute_imbalance(global_box, local_box, - cur_imbalance, cur_std_dev, doc, false); - } -} - -}//namespace miniFE - -#endif - diff --git a/kokkos/basic/main.cpp b/kokkos/basic/main.cpp deleted file mode 100644 index ed3753f..0000000 --- a/kokkos/basic/main.cpp +++ /dev/null @@ -1,247 +0,0 @@ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER -#include -#include -#include -#include - -#include - -#include - -#ifdef HAVE_MPI -#include -#endif - -//-------------------------------------------------------------------- -#include -//-------------------------------------------------------------------- - -#include -#include -#include -#include -#include -#include -#include - -#if MINIFE_INFO != 0 -#include -#else -#include -#endif - -//The following macros should be specified as compile-macros in the -//makefile. They are defaulted here just in case... -#ifndef MINIFE_SCALAR -#define MINIFE_SCALAR double -#endif -#ifndef MINIFE_LOCAL_ORDINAL -#define MINIFE_LOCAL_ORDINAL int -#endif -#ifndef MINIFE_GLOBAL_ORDINAL -#define MINIFE_GLOBAL_ORDINAL int -#endif - -// ************************************************************************ - -void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params); -void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads); -void add_timestring_to_yaml(YAML_Doc& doc); - -inline void print_box(int myproc, const char* name, const Box& box, - const char* name2, const Box& box2) -{ - std::cout << "proc " << myproc << " "< local_boxes(numprocs); - - box_partition(0, numprocs, 2, global_box, &local_boxes[0]); - - Box& my_box = local_boxes[myproc]; - -//print_box(myproc, "global-box", global_box, "local-box", my_box); - - std::ostringstream osstr; - osstr << "miniFE." << params.nx << "x" << params.ny << "x" << params.nz; -#ifdef HAVE_MPI - osstr << ".P"<. - //To run miniFE with float instead of double, or 'long long' instead of int, - //etc., change these template-parameters by changing the macro definitions in - //the makefile or on the make command-line. - - miniFE::driver< MINIFE_SCALAR, MINIFE_LOCAL_ORDINAL, MINIFE_GLOBAL_ORDINAL, - ComputeNodeType>(global_box, my_box, compute_node, params, doc); - - miniFE::timer_type total_time = miniFE::mytimer() - start_time; - - if (myproc == 0) { - doc.add("Total Program Time",total_time); - std::cout << doc.generateYAML() << std::endl; - } - - miniFE::finalize_mpi(); - - return 0; -} - -void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params) -{ - doc.add("Global Run Parameters",""); - doc.get("Global Run Parameters")->add("dimensions",""); - doc.get("Global Run Parameters")->get("dimensions")->add("nx",params.nx); - doc.get("Global Run Parameters")->get("dimensions")->add("ny",params.ny); - doc.get("Global Run Parameters")->get("dimensions")->add("nz",params.nz); - doc.get("Global Run Parameters")->add("load_imbalance", params.load_imbalance); - if (params.mv_overlap_comm_comp == 1) { - std::string val("1 (yes)"); - doc.get("Global Run Parameters")->add("mv_overlap_comm_comp", val); - } - else { - std::string val("0 (no)"); - doc.get("Global Run Parameters")->add("mv_overlap_comm_comp", val); - } -} - -void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads) -{ - doc.get("Global Run Parameters")->add("number of processors", numprocs); - std::string threading("none"); - -#ifdef MINIFE_HAVE_TPI - threading = "TPI"; -#endif -#ifdef MINIFE_HAVE_TBB - threading = "TBB"; -#endif -#ifdef MINIFE_HAVE_CUDA - threading = "CUDA"; -#endif - if (threading != "none") { - doc.get("Global Run Parameters")->add("(per proc) numthreads",numthreads); - } - - doc.add("Platform",""); - doc.get("Platform")->add("hostname",MINIFE_HOSTNAME); - doc.get("Platform")->add("kernel name",MINIFE_KERNEL_NAME); - doc.get("Platform")->add("kernel release",MINIFE_KERNEL_RELEASE); - doc.get("Platform")->add("processor",MINIFE_PROCESSOR); - - doc.add("Build",""); - doc.get("Build")->add("CXX",MINIFE_CXX); - doc.get("Build")->add("compiler version",MINIFE_CXX_VERSION); - doc.get("Build")->add("CXXFLAGS",MINIFE_CXXFLAGS); - std::string using_mpi("no"); -#ifdef HAVE_MPI - using_mpi = "yes"; -#endif - doc.get("Build")->add("using MPI",using_mpi); - doc.get("Build")->add("Threading",threading.c_str()); -} - -void add_timestring_to_yaml(YAML_Doc& doc) -{ - std::time_t rawtime; - struct tm * timeinfo; - std::time(&rawtime); - timeinfo = std::localtime(&rawtime); - std::ostringstream osstr; - osstr.fill('0'); - osstr << timeinfo->tm_year+1900 << "-"; - osstr.width(2); osstr << timeinfo->tm_mon+1 << "-"; - osstr.width(2); osstr << timeinfo->tm_mday << ", "; - osstr.width(2); osstr << timeinfo->tm_hour << "-"; - osstr.width(2); osstr << timeinfo->tm_min << "-"; - osstr.width(2); osstr << timeinfo->tm_sec; - std::string timestring = osstr.str(); - doc.add("Run Date/Time",timestring); -} - diff --git a/kokkos/basic/make_local_matrix.hpp b/kokkos/basic/make_local_matrix.hpp deleted file mode 100644 index 99c2cf7..0000000 --- a/kokkos/basic/make_local_matrix.hpp +++ /dev/null @@ -1,440 +0,0 @@ -#ifndef _make_local_matrix_hpp_ -#define _make_local_matrix_hpp_ - -//@HEADER -// ************************************************************************ -// -// MiniFE: Simple Finite Element Assembly and Solve -// Copyright (2006-2013) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// -// ************************************************************************ -//@HEADER - -#include - -#ifdef HAVE_MPI -#include -#endif - -namespace miniFE { - -template -void -make_local_matrix(MatrixType& A) -{ -#ifdef HAVE_MPI - int numprocs = 1, myproc = 0; - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); - - if (numprocs < 2) { - A.num_cols = A.rows.size(); - A.has_local_indices = true; - return; - } - - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - typedef typename MatrixType::LocalOrdinalType LocalOrdinal; - typedef typename MatrixType::ScalarType Scalar; - - std::map externals; - LocalOrdinal num_external = 0; - - //Extract Matrix pieces - - size_t local_nrow = A.rows.size(); - GlobalOrdinal start_row = local_nrow>0 ? A.rows[0] : -1; - GlobalOrdinal stop_row = local_nrow>0 ? A.rows[local_nrow-1] : -1; - - // We need to convert the index values for the rows on this processor - // to a local index space. We need to: - // - Determine if each index reaches to a local value or external value - // - If local, subtract start_row from index value to get local index - // - If external, find out if it is already accounted for. - // - If so, then do nothing, - // - otherwise - // - add it to the list of external indices, - // - find out which processor owns the value. - // - Set up communication for sparse MV operation - - /////////////////////////////////////////// - // Scan the indices and transform to local - /////////////////////////////////////////// - - std::vector& external_index = A.external_index; - - for(size_t i=0; i tmp_buffer(numprocs, 0); // Temp buffer space needed below - - // Build list of global index offset - - std::vector global_index_offsets(numprocs, 0); - - tmp_buffer[myproc] = start_row; // This is my start row - - // This call sends the start_row of each ith processor to the ith - // entry of global_index_offsets on all processors. - // Thus, each processor knows the range of indices owned by all - // other processors. - // Note: There might be a better algorithm for doing this, but this - // will work... - - MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); - MPI_Allreduce(&tmp_buffer[0], &global_index_offsets[0], numprocs, mpi_dtype, - MPI_SUM, MPI_COMM_WORLD); - - // Go through list of externals and find the processor that owns each - std::vector external_processor(num_external); - - for(LocalOrdinal i=0; i=0; --j) { - if (global_index_offsets[j] <= cur_ind && global_index_offsets[j] >= 0) { - external_processor[i] = j; - break; - } - } - } - - ///////////////////////////////////////////////////////////////////////// - // Sift through the external elements. For each newly encountered external - // point assign it the next index in the sequence. Then look for other - // external elements who are updated by the same node and assign them the next - // set of index numbers in the sequence (ie. elements updated by the same node - // have consecutive indices). - ///////////////////////////////////////////////////////////////////////// - - size_t count = local_nrow; - std::vector& external_local_index = A.external_local_index; - external_local_index.assign(num_external, -1); - - for(LocalOrdinal i=0; i new_external_processor(num_external, 0); - - for(int i=0; i No external elements are updated by - // processor i. - // tmp_neighbors[i] = x ==> (x-1)/numprocs elements are updated from - // processor i. - /// - //////////////////////////////////////////////////////////////////////// - - std::vector tmp_neighbors(numprocs, 0); - - int num_recv_neighbors = 0; - int length = 1; - - for(LocalOrdinal i=0; i recv_list; - recv_list.push_back(new_external_processor[0]); - for(LocalOrdinal i=1; i send_list(num_send_neighbors, 0); - - // - // first post receives, these are immediate receives - // Do not wait for result to come, will do that at the - // wait call below. - // - int MPI_MY_TAG = 99; - - std::vector request(num_send_neighbors); - for(int i=0; i new_external(num_external); - for(LocalOrdinal i=0; i lengths(num_recv_neighbors); - - ++MPI_MY_TAG; - - // First post receives - - for(int i=0; i& neighbors = A.neighbors; - std::vector& recv_length = A.recv_length; - std::vector& send_length = A.send_length; - - neighbors.resize(num_recv_neighbors, 0); - A.request.resize(num_recv_neighbors); - recv_length.resize(num_recv_neighbors, 0); - send_length.resize(num_recv_neighbors, 0); - - LocalOrdinal j = 0; - for(int i=0; i $(DESTDIR)$(includedir)/Makefile.export.threadpool.macros - -uninstall-hook: - rm -f $(includedir)/Makefile.export.threadpool - rm -f $(includedir)/Makefile.export.threadpool.macros - -else - -install-exec-hook: - -uninstall-hook: - -endif - -## ####################################################################### -## Subdirectories to be make'd recursively -## ####################################################################### -#We now build tests and examples through separate make targets, rather than -#during "make". We still need to conditionally include the test and example -#in SUBDIRS, even though BUILD_TESTS and BUILD_EXAMPLES will never be -#defined, so that the tests and examples are included in the distribution -#tarball. - -if SUB_TEST -TEST_SUBDIR=test -endif - -#if SUB_EXAMPLE -#EXAMPLE_SUBDIR=example -#endif - -# #np# - The following make targets must be defined for all packages. -# #np# - If the package does not have tests or examples, replace the -# #np# - corresponding rules with something like: -# #np# - @echo "new_package does not have any tests yet" -if BUILD_TESTS -tests: - @echo "" - @echo "Now building ThreadPool tests." - @echo "" - cd $(top_builddir)/test && $(MAKE) - @echo "" - @echo "Finished building ThreadPool tests." - @echo "" -else -tests: - @echo "ThreadPool tests were disabled at configure time" -endif - -examples: - @echo "ThreadPool does not have any examples yet" - -install-examples: - @echo "ThreadPool does not have any examples yet" - -clean-tests: - cd $(top_builddir)/test && $(MAKE) clean - -clean-examples: - @echo "ThreadPool does not have any examples yet" - -everything: - $(MAKE) && $(MAKE) examples && $(MAKE) tests - -clean-everything: - $(MAKE) clean-examples && $(MAKE) clean-tests && $(MAKE) clean - -install-everything: - $(MAKE) install && $(MAKE) install-examples - -SUBDIRS = src $(TEST_SUBDIR) - -## ####################################################################### -## The below targets allow you to use the new -## testharness to run the test suite as make targets -## ####################################################################### - -TRILINOS_HOME_DIR=@abs_top_srcdir@/../.. -TRILINOS_BUILD_DIR=@abs_top_builddir@/../.. -TRILINOS_TEST_CATEGORY=INSTALL - -runtests-serial : - $(PERL_EXE) $(TRILINOS_HOME_DIR)/commonTools/test/utilities/runtests \ - --trilinos-dir=$(TRILINOS_HOME_DIR) \ - --comm=serial \ - --build-dir=$(TRILINOS_BUILD_DIR) \ - --category=$(TRILINOS_TEST_CATEGORY) \ - --output-dir=@abs_top_builddir@/test/runtests-results \ - --verbosity=1 \ - --packages=ThreadPool - -runtests-mpi : - $(PERL_EXE) $(TRILINOS_HOME_DIR)/commonTools/test/utilities/runtests \ - --trilinos-dir=$(TRILINOS_HOME_DIR) \ - --comm=mpi \ - --mpi-go=$(TRILINOS_MPI_GO) \ - --build-dir=$(TRILINOS_BUILD_DIR) \ - --category=$(TRILINOS_TEST_CATEGORY) \ - --output-dir=@abs_top_builddir@/test/runtests-results \ - --verbosity=1 \ - --packages=ThreadPool - -if HAVE_MPI -THREADPOOL_CHECK_COMM=mpi -else -THREADPOOL_CHECK_COMM=serial -endif - diff --git a/kokkos/basic/optional/ThreadPool/Makefile.export.threadpool.in b/kokkos/basic/optional/ThreadPool/Makefile.export.threadpool.in deleted file mode 100644 index 66bfda9..0000000 --- a/kokkos/basic/optional/ThreadPool/Makefile.export.threadpool.in +++ /dev/null @@ -1,9 +0,0 @@ -_THREADPOOL_INCLUDES = -I@abs_top_srcdir@/include -I@abs_top_builddir@/include - -_THREADPOOL_LIBS = @LDFLAGS@ -L@abs_top_builddir@/src -ltpi $(LIBS) - -@USING_GNUMAKE_TRUE@THREADPOOL_INCLUDES = $(shell @PERL_EXE@ @abs_top_srcdir@/config/strip_dup_incl_paths.pl $(_THREADPOOL_INCLUDES)) -@USING_GNUMAKE_TRUE@THREADPOOL_LIBS = $(shell @PERL_EXE@ @abs_top_srcdir@/config/strip_dup_libs.pl $(_THREADPOOL_LIBS)) - -@USING_GNUMAKE_FALSE@THREADPOOL_INCLUDES = $(_THREADPOOL_INCLUDES) -@USING_GNUMAKE_FALSE@THREADPOOL_LIBS = $(_THREADPOOL_LIBS) diff --git a/kokkos/basic/optional/ThreadPool/Makefile.in b/kokkos/basic/optional/ThreadPool/Makefile.in deleted file mode 100644 index 3e4abfd..0000000 --- a/kokkos/basic/optional/ThreadPool/Makefile.in +++ /dev/null @@ -1,777 +0,0 @@ -# Makefile.in generated by automake 1.10 from Makefile.am. -# @configure_input@ - -# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -@SET_MAKE@ - -# @HEADER -# ************************************************************************ -# -# ThreadPool Package -# Copyright (2008) Sandia Corporation -# -# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -# license for use of this work by or on behalf of the U.S. Government. -# -# This library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as -# published by the Free Software Foundation; either version 2.1 of the -# License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -# USA -# Questions? Contact Carter Edwards (hcedwar@sandia.gov) -# -# ************************************************************************ -# @HEADER -VPATH = @srcdir@ -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ -am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd -install_sh_DATA = $(install_sh) -c -m 644 -install_sh_PROGRAM = $(install_sh) -c -install_sh_SCRIPT = $(install_sh) -c -INSTALL_HEADER = $(INSTALL_DATA) -transform = $(program_transform_name) -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -build_triplet = @build@ -host_triplet = @host@ -target_triplet = @target@ -subdir = . -DIST_COMMON = $(am__configure_deps) $(srcdir)/Makefile.am \ - $(srcdir)/Makefile.export.threadpool.in $(srcdir)/Makefile.in \ - $(top_srcdir)/configure config/config.guess config/config.sub \ - config/depcomp config/install-sh config/missing -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/config/acx_pthread.m4 \ - $(top_srcdir)/config/tac_arg_check_mpi.m4 \ - $(top_srcdir)/config/tac_arg_config_mpi.m4 \ - $(top_srcdir)/config/tac_arg_enable_export-makefiles.m4 \ - $(top_srcdir)/config/tac_arg_enable_feature.m4 \ - $(top_srcdir)/config/tac_arg_enable_feature_sub_check.m4 \ - $(top_srcdir)/config/tac_arg_with_ar.m4 \ - $(top_srcdir)/config/tac_arg_with_flags.m4 \ - $(top_srcdir)/config/tac_arg_with_incdirs.m4 \ - $(top_srcdir)/config/tac_arg_with_libdirs.m4 \ - $(top_srcdir)/config/tac_arg_with_libs.m4 \ - $(top_srcdir)/config/tac_arg_with_perl.m4 \ - $(top_srcdir)/configure.ac -am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ - $(ACLOCAL_M4) -am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ - configure.lineno config.status.lineno -mkinstalldirs = $(install_sh) -d -CONFIG_HEADER = $(top_builddir)/src/ThreadPool_config.h -CONFIG_CLEAN_FILES = Makefile.export.threadpool -SOURCES = -DIST_SOURCES = -RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ - html-recursive info-recursive install-data-recursive \ - install-dvi-recursive install-exec-recursive \ - install-html-recursive install-info-recursive \ - install-pdf-recursive install-ps-recursive install-recursive \ - installcheck-recursive installdirs-recursive pdf-recursive \ - ps-recursive uninstall-recursive -RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ - distclean-recursive maintainer-clean-recursive -ETAGS = etags -CTAGS = ctags -DIST_SUBDIRS = src test -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) -distdir = $(PACKAGE)-$(VERSION) -top_distdir = $(distdir) -am__remove_distdir = \ - { test ! -d $(distdir) \ - || { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \ - && rm -fr $(distdir); }; } -DIST_ARCHIVES = $(distdir).tar.gz -GZIP_ENV = --best -distuninstallcheck_listfiles = find . -type f -print -distcleancheck_listfiles = find . -type f -print -ACLOCAL = @ACLOCAL@ -ALTERNATE_AR = @ALTERNATE_AR@ -AMTAR = @AMTAR@ -AUTOCONF = @AUTOCONF@ -AUTOHEADER = @AUTOHEADER@ -AUTOMAKE = @AUTOMAKE@ -AWK = @AWK@ -CC = @CC@ -CCDEPMODE = @CCDEPMODE@ -CFLAGS = @CFLAGS@ -CPPFLAGS = @CPPFLAGS@ -CXX = @CXX@ -CXXCPP = @CXXCPP@ -CXXDEPMODE = @CXXDEPMODE@ -CXXFLAGS = @CXXFLAGS@ -CYGPATH_W = @CYGPATH_W@ -DEFS = @DEFS@ -DEPDIR = @DEPDIR@ -ECHO_C = @ECHO_C@ -ECHO_N = @ECHO_N@ -ECHO_T = @ECHO_T@ -EGREP = @EGREP@ -EXEEXT = @EXEEXT@ -GREP = @GREP@ -HAVE_PERL = @HAVE_PERL@ -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -LDFLAGS = @LDFLAGS@ -LIBOBJS = @LIBOBJS@ -LIBS = @LIBS@ -LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ -MAKEINFO = @MAKEINFO@ -MKDIR_P = @MKDIR_P@ -MPI_CC_EXISTS = @MPI_CC_EXISTS@ -MPI_CXX = @MPI_CXX@ -MPI_CXX_EXISTS = @MPI_CXX_EXISTS@ -MPI_F77_EXISTS = @MPI_F77_EXISTS@ -MPI_TEMP_CXX = @MPI_TEMP_CXX@ -OBJEXT = @OBJEXT@ -PACKAGE = @PACKAGE@ -PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ -PACKAGE_NAME = @PACKAGE_NAME@ -PACKAGE_STRING = @PACKAGE_STRING@ -PACKAGE_TARNAME = @PACKAGE_TARNAME@ -PACKAGE_VERSION = @PACKAGE_VERSION@ -PATH_SEPARATOR = @PATH_SEPARATOR@ -PERL_EXE = @PERL_EXE@ -PTHREAD_CC = @PTHREAD_CC@ -PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ -PTHREAD_LIBS = @PTHREAD_LIBS@ -RANLIB = @RANLIB@ -SET_MAKE = @SET_MAKE@ -SHELL = @SHELL@ -STRIP = @STRIP@ -VERSION = @VERSION@ -abs_builddir = @abs_builddir@ -abs_srcdir = @abs_srcdir@ -abs_top_builddir = @abs_top_builddir@ -abs_top_srcdir = @abs_top_srcdir@ -ac_aux_dir = @ac_aux_dir@ -ac_ct_CC = @ac_ct_CC@ -ac_ct_CXX = @ac_ct_CXX@ -am__include = @am__include@ -am__leading_dot = @am__leading_dot@ -am__quote = @am__quote@ -am__tar = @am__tar@ -am__untar = @am__untar@ -bindir = @bindir@ -build = @build@ -build_alias = @build_alias@ -build_cpu = @build_cpu@ -build_os = @build_os@ -build_vendor = @build_vendor@ -builddir = @builddir@ -datadir = @datadir@ -datarootdir = @datarootdir@ -docdir = @docdir@ -dvidir = @dvidir@ -exec_prefix = @exec_prefix@ -host = @host@ -host_alias = @host_alias@ -host_cpu = @host_cpu@ -host_os = @host_os@ -host_vendor = @host_vendor@ -htmldir = @htmldir@ -includedir = @includedir@ -infodir = @infodir@ -install_sh = @install_sh@ -libdir = @libdir@ -libexecdir = @libexecdir@ -localedir = @localedir@ -localstatedir = @localstatedir@ -mandir = @mandir@ -mkdir_p = @mkdir_p@ -oldincludedir = @oldincludedir@ -pdfdir = @pdfdir@ -prefix = @prefix@ -program_transform_name = @program_transform_name@ -psdir = @psdir@ -sbindir = @sbindir@ -sharedstatedir = @sharedstatedir@ -srcdir = @srcdir@ -sysconfdir = @sysconfdir@ -target = @target@ -target_alias = @target_alias@ -target_cpu = @target_cpu@ -target_os = @target_os@ -target_vendor = @target_vendor@ -top_builddir = @top_builddir@ -top_srcdir = @top_srcdir@ -AUTOMAKE_OPTIONS = foreign -ACLOCAL_AMFLAGS = -I config - -# -# I believe that by switching to AUX_DIR(../../config) one -# could get rid of these. -# -#np# For a typical package, there is no reason to distribute these files -#np# because users should not have to bootstrap. We distribute them with -#np# new package so that the files can be used in creating the -#np# configure script for other packages. -EXTRA_DIST = \ -config/generate-makeoptions.pl \ -config/replace-install-prefix.pl config/string-replace.pl \ -config/strip_dup_incl_paths.pl config/strip_dup_libs.pl \ -config/token-replace.pl - -AUX_DIST = config/install-sh config/missing config/mkinstalldirs -# -# Again, I hope that AUX_DIR(../../config) eliminates these -# config/install-sh config/missing config/mkinstalldirs -MAINTAINERCLEANFILES = Makefile.in aclocal.m4 autom4te.cache/* \ - configure config.status config.log \ - src/common/config-h.in src/common/stamp-h.in \ - $(AUX_DIST) - - -#We now build tests and examples through separate make targets, rather than -#during "make". We still need to conditionally include the test and example -#in SUBDIRS, even though BUILD_TESTS and BUILD_EXAMPLES will never be -#defined, so that the tests and examples are included in the distribution -#tarball. -@SUB_TEST_TRUE@TEST_SUBDIR = test -SUBDIRS = src $(TEST_SUBDIR) -TRILINOS_HOME_DIR = @abs_top_srcdir@/../.. -TRILINOS_BUILD_DIR = @abs_top_builddir@/../.. -TRILINOS_TEST_CATEGORY = INSTALL -@HAVE_MPI_FALSE@THREADPOOL_CHECK_COMM = serial -@HAVE_MPI_TRUE@THREADPOOL_CHECK_COMM = mpi -all: all-recursive - -.SUFFIXES: -am--refresh: - @: -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) - @for dep in $?; do \ - case '$(am__configure_deps)' in \ - *$$dep*) \ - echo ' cd $(srcdir) && $(AUTOMAKE) --foreign '; \ - cd $(srcdir) && $(AUTOMAKE) --foreign \ - && exit 0; \ - exit 1;; \ - esac; \ - done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ - cd $(top_srcdir) && \ - $(AUTOMAKE) --foreign Makefile -.PRECIOUS: Makefile -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - @case '$?' in \ - *config.status*) \ - echo ' $(SHELL) ./config.status'; \ - $(SHELL) ./config.status;; \ - *) \ - echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ - cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ - esac; - -$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - $(SHELL) ./config.status --recheck - -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) - cd $(srcdir) && $(AUTOCONF) -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) - cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) -Makefile.export.threadpool: $(top_builddir)/config.status $(srcdir)/Makefile.export.threadpool.in - cd $(top_builddir) && $(SHELL) ./config.status $@ - -# This directory's subdirectories are mostly independent; you can cd -# into them and run `make' without going through this Makefile. -# To change the values of `make' variables: instead of editing Makefiles, -# (1) if the variable is set in `config.status', edit `config.status' -# (which will cause the Makefiles to be regenerated when you run `make'); -# (2) otherwise, pass the desired values on the `make' command line. -$(RECURSIVE_TARGETS): - @failcom='exit 1'; \ - for f in x $$MAKEFLAGS; do \ - case $$f in \ - *=* | --[!k]*);; \ - *k*) failcom='fail=yes';; \ - esac; \ - done; \ - dot_seen=no; \ - target=`echo $@ | sed s/-recursive//`; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - echo "Making $$target in $$subdir"; \ - if test "$$subdir" = "."; then \ - dot_seen=yes; \ - local_target="$$target-am"; \ - else \ - local_target="$$target"; \ - fi; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ - || eval $$failcom; \ - done; \ - if test "$$dot_seen" = "no"; then \ - $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ - fi; test -z "$$fail" - -$(RECURSIVE_CLEAN_TARGETS): - @failcom='exit 1'; \ - for f in x $$MAKEFLAGS; do \ - case $$f in \ - *=* | --[!k]*);; \ - *k*) failcom='fail=yes';; \ - esac; \ - done; \ - dot_seen=no; \ - case "$@" in \ - distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ - *) list='$(SUBDIRS)' ;; \ - esac; \ - rev=''; for subdir in $$list; do \ - if test "$$subdir" = "."; then :; else \ - rev="$$subdir $$rev"; \ - fi; \ - done; \ - rev="$$rev ."; \ - target=`echo $@ | sed s/-recursive//`; \ - for subdir in $$rev; do \ - echo "Making $$target in $$subdir"; \ - if test "$$subdir" = "."; then \ - local_target="$$target-am"; \ - else \ - local_target="$$target"; \ - fi; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ - || eval $$failcom; \ - done && test -z "$$fail" -tags-recursive: - list='$(SUBDIRS)'; for subdir in $$list; do \ - test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ - done -ctags-recursive: - list='$(SUBDIRS)'; for subdir in $$list; do \ - test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ - done - -ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - mkid -fID $$unique -tags: TAGS - -TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - here=`pwd`; \ - if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ - include_option=--etags-include; \ - empty_fix=.; \ - else \ - include_option=--include; \ - empty_fix=; \ - fi; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - if test "$$subdir" = .; then :; else \ - test ! -f $$subdir/TAGS || \ - tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \ - fi; \ - done; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ - test -n "$$unique" || unique=$$empty_fix; \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - $$tags $$unique; \ - fi -ctags: CTAGS -CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(CTAGS_ARGS)$$tags$$unique" \ - || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ - $$tags $$unique - -GTAGS: - here=`$(am__cd) $(top_builddir) && pwd` \ - && cd $(top_srcdir) \ - && gtags -i $(GTAGS_ARGS) $$here - -distclean-tags: - -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags - -distdir: $(DISTFILES) - $(am__remove_distdir) - test -d $(distdir) || mkdir $(distdir) - @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - list='$(DISTFILES)'; \ - dist_files=`for file in $$list; do echo $$file; done | \ - sed -e "s|^$$srcdirstrip/||;t" \ - -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ - case $$dist_files in \ - */*) $(MKDIR_P) `echo "$$dist_files" | \ - sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ - sort -u` ;; \ - esac; \ - for file in $$dist_files; do \ - if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ - if test -d $$d/$$file; then \ - dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ - if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ - cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ - fi; \ - cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ - else \ - test -f $(distdir)/$$file \ - || cp -p $$d/$$file $(distdir)/$$file \ - || exit 1; \ - fi; \ - done - list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ - if test "$$subdir" = .; then :; else \ - test -d "$(distdir)/$$subdir" \ - || $(MKDIR_P) "$(distdir)/$$subdir" \ - || exit 1; \ - distdir=`$(am__cd) $(distdir) && pwd`; \ - top_distdir=`$(am__cd) $(top_distdir) && pwd`; \ - (cd $$subdir && \ - $(MAKE) $(AM_MAKEFLAGS) \ - top_distdir="$$top_distdir" \ - distdir="$$distdir/$$subdir" \ - am__remove_distdir=: \ - am__skip_length_check=: \ - distdir) \ - || exit 1; \ - fi; \ - done - -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \ - ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ - ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ - ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ - || chmod -R a+r $(distdir) -dist-gzip: distdir - tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz - $(am__remove_distdir) - -dist-bzip2: distdir - tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2 - $(am__remove_distdir) - -dist-tarZ: distdir - tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z - $(am__remove_distdir) - -dist-shar: distdir - shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz - $(am__remove_distdir) - -dist-zip: distdir - -rm -f $(distdir).zip - zip -rq $(distdir).zip $(distdir) - $(am__remove_distdir) - -dist dist-all: distdir - tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz - $(am__remove_distdir) - -# This target untars the dist file and tries a VPATH configuration. Then -# it guarantees that the distribution is self-contained by making another -# tarfile. -distcheck: dist - case '$(DIST_ARCHIVES)' in \ - *.tar.gz*) \ - GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\ - *.tar.bz2*) \ - bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\ - *.tar.Z*) \ - uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ - *.shar.gz*) \ - GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\ - *.zip*) \ - unzip $(distdir).zip ;;\ - esac - chmod -R a-w $(distdir); chmod a+w $(distdir) - mkdir $(distdir)/_build - mkdir $(distdir)/_inst - chmod a-w $(distdir) - dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ - && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ - && cd $(distdir)/_build \ - && ../configure --srcdir=.. --prefix="$$dc_install_base" \ - $(DISTCHECK_CONFIGURE_FLAGS) \ - && $(MAKE) $(AM_MAKEFLAGS) \ - && $(MAKE) $(AM_MAKEFLAGS) dvi \ - && $(MAKE) $(AM_MAKEFLAGS) check \ - && $(MAKE) $(AM_MAKEFLAGS) install \ - && $(MAKE) $(AM_MAKEFLAGS) installcheck \ - && $(MAKE) $(AM_MAKEFLAGS) uninstall \ - && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ - distuninstallcheck \ - && chmod -R a-w "$$dc_install_base" \ - && ({ \ - (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ - && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ - && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ - && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ - distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ - } || { rm -rf "$$dc_destdir"; exit 1; }) \ - && rm -rf "$$dc_destdir" \ - && $(MAKE) $(AM_MAKEFLAGS) dist \ - && rm -rf $(DIST_ARCHIVES) \ - && $(MAKE) $(AM_MAKEFLAGS) distcleancheck - $(am__remove_distdir) - @(echo "$(distdir) archives ready for distribution: "; \ - list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ - sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' -distuninstallcheck: - @cd $(distuninstallcheck_dir) \ - && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \ - || { echo "ERROR: files left after uninstall:" ; \ - if test -n "$(DESTDIR)"; then \ - echo " (check DESTDIR support)"; \ - fi ; \ - $(distuninstallcheck_listfiles) ; \ - exit 1; } >&2 -distcleancheck: distclean - @if test '$(srcdir)' = . ; then \ - echo "ERROR: distcleancheck can only run from a VPATH build" ; \ - exit 1 ; \ - fi - @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ - || { echo "ERROR: files left in build directory after distclean:" ; \ - $(distcleancheck_listfiles) ; \ - exit 1; } >&2 -check-am: all-am -check: check-recursive -all-am: Makefile all-local -installdirs: installdirs-recursive -installdirs-am: -install: install-recursive -install-exec: install-exec-recursive -install-data: install-data-recursive -uninstall: uninstall-recursive - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am - -installcheck: installcheck-recursive -install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - `test -z '$(STRIP)' || \ - echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) - -maintainer-clean-generic: - @echo "This command is intended for maintainers to use" - @echo "it deletes files that may require special tools to rebuild." - -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) -clean: clean-recursive - -clean-am: clean-generic mostlyclean-am - -distclean: distclean-recursive - -rm -f $(am__CONFIG_DISTCLEAN_FILES) - -rm -f Makefile -distclean-am: clean-am distclean-generic distclean-tags - -dvi: dvi-recursive - -dvi-am: - -html: html-recursive - -info: info-recursive - -info-am: - -install-data-am: - -install-dvi: install-dvi-recursive - -install-exec-am: - @$(NORMAL_INSTALL) - $(MAKE) $(AM_MAKEFLAGS) install-exec-hook - -install-html: install-html-recursive - -install-info: install-info-recursive - -install-man: - -install-pdf: install-pdf-recursive - -install-ps: install-ps-recursive - -installcheck-am: - -maintainer-clean: maintainer-clean-recursive - -rm -f $(am__CONFIG_DISTCLEAN_FILES) - -rm -rf $(top_srcdir)/autom4te.cache - -rm -f Makefile -maintainer-clean-am: distclean-am maintainer-clean-generic - -mostlyclean: mostlyclean-recursive - -mostlyclean-am: mostlyclean-generic - -pdf: pdf-recursive - -pdf-am: - -ps: ps-recursive - -ps-am: - -uninstall-am: - @$(NORMAL_INSTALL) - $(MAKE) $(AM_MAKEFLAGS) uninstall-hook - -.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \ - install-exec-am install-strip uninstall-am - -.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ - all all-am all-local am--refresh check check-am clean \ - clean-generic ctags ctags-recursive dist dist-all dist-bzip2 \ - dist-gzip dist-shar dist-tarZ dist-zip distcheck distclean \ - distclean-generic distclean-tags distcleancheck distdir \ - distuninstallcheck dvi dvi-am html html-am info info-am \ - install install-am install-data install-data-am install-dvi \ - install-dvi-am install-exec install-exec-am install-exec-hook \ - install-html install-html-am install-info install-info-am \ - install-man install-pdf install-pdf-am install-ps \ - install-ps-am install-strip installcheck installcheck-am \ - installdirs installdirs-am maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ - pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \ - uninstall-hook - - -#The following line helps the test harness recover from build errors. - -all-local: - @echo "Trilinos package ThreadPool built successfully" - -@USING_EXPORT_MAKEFILES_TRUE@install-exec-hook: -@USING_EXPORT_MAKEFILES_TRUE@ mkdir -p $(DESTDIR)$(includedir) -@USING_EXPORT_MAKEFILES_TRUE@ cp $(top_builddir)/Makefile.export.threadpool $(DESTDIR)$(includedir)/. -@USING_EXPORT_MAKEFILES_TRUE@ $(PERL_EXE) $(top_srcdir)/config/replace-install-prefix.pl \ -@USING_EXPORT_MAKEFILES_TRUE@ --exec-prefix=$(exec_prefix) \ -@USING_EXPORT_MAKEFILES_TRUE@ --my-export-makefile=Makefile.export.threadpool \ -@USING_EXPORT_MAKEFILES_TRUE@ --my-abs-top-srcdir=@abs_top_srcdir@ \ -@USING_EXPORT_MAKEFILES_TRUE@ --my-abs-incl-dirs=@abs_top_builddir@/src:@abs_top_srcdir@/src \ -@USING_EXPORT_MAKEFILES_TRUE@ --my-abs-lib-dirs=@abs_top_builddir@/src -@USING_EXPORT_MAKEFILES_TRUE@ $(PERL_EXE) $(top_srcdir)/config/generate-makeoptions.pl $(top_builddir)/src/Makefile \ -@USING_EXPORT_MAKEFILES_TRUE@ THREADPOOL > $(DESTDIR)$(includedir)/Makefile.export.threadpool.macros - -@USING_EXPORT_MAKEFILES_TRUE@uninstall-hook: -@USING_EXPORT_MAKEFILES_TRUE@ rm -f $(includedir)/Makefile.export.threadpool -@USING_EXPORT_MAKEFILES_TRUE@ rm -f $(includedir)/Makefile.export.threadpool.macros - -@USING_EXPORT_MAKEFILES_FALSE@install-exec-hook: - -@USING_EXPORT_MAKEFILES_FALSE@uninstall-hook: - -#if SUB_EXAMPLE -#EXAMPLE_SUBDIR=example -#endif - -# #np# - The following make targets must be defined for all packages. -# #np# - If the package does not have tests or examples, replace the -# #np# - corresponding rules with something like: -# #np# - @echo "new_package does not have any tests yet" -@BUILD_TESTS_TRUE@tests: -@BUILD_TESTS_TRUE@ @echo "" -@BUILD_TESTS_TRUE@ @echo "Now building ThreadPool tests." -@BUILD_TESTS_TRUE@ @echo "" -@BUILD_TESTS_TRUE@ cd $(top_builddir)/test && $(MAKE) -@BUILD_TESTS_TRUE@ @echo "" -@BUILD_TESTS_TRUE@ @echo "Finished building ThreadPool tests." -@BUILD_TESTS_TRUE@ @echo "" -@BUILD_TESTS_FALSE@tests: -@BUILD_TESTS_FALSE@ @echo "ThreadPool tests were disabled at configure time" - -examples: - @echo "ThreadPool does not have any examples yet" - -install-examples: - @echo "ThreadPool does not have any examples yet" - -clean-tests: - cd $(top_builddir)/test && $(MAKE) clean - -clean-examples: - @echo "ThreadPool does not have any examples yet" - -everything: - $(MAKE) && $(MAKE) examples && $(MAKE) tests - -clean-everything: - $(MAKE) clean-examples && $(MAKE) clean-tests && $(MAKE) clean - -install-everything: - $(MAKE) install && $(MAKE) install-examples - -runtests-serial : - $(PERL_EXE) $(TRILINOS_HOME_DIR)/commonTools/test/utilities/runtests \ - --trilinos-dir=$(TRILINOS_HOME_DIR) \ - --comm=serial \ - --build-dir=$(TRILINOS_BUILD_DIR) \ - --category=$(TRILINOS_TEST_CATEGORY) \ - --output-dir=@abs_top_builddir@/test/runtests-results \ - --verbosity=1 \ - --packages=ThreadPool - -runtests-mpi : - $(PERL_EXE) $(TRILINOS_HOME_DIR)/commonTools/test/utilities/runtests \ - --trilinos-dir=$(TRILINOS_HOME_DIR) \ - --comm=mpi \ - --mpi-go=$(TRILINOS_MPI_GO) \ - --build-dir=$(TRILINOS_BUILD_DIR) \ - --category=$(TRILINOS_TEST_CATEGORY) \ - --output-dir=@abs_top_builddir@/test/runtests-results \ - --verbosity=1 \ - --packages=ThreadPool -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/kokkos/basic/optional/ThreadPool/ThreadPool_config.h b/kokkos/basic/optional/ThreadPool/ThreadPool_config.h deleted file mode 100644 index b941069..0000000 --- a/kokkos/basic/optional/ThreadPool/ThreadPool_config.h +++ /dev/null @@ -1,3 +0,0 @@ -#ifndef HAVE_PTHREAD -#define HAVE_PTHREAD -#endif diff --git a/kokkos/basic/optional/ThreadPool/aclocal.m4 b/kokkos/basic/optional/ThreadPool/aclocal.m4 deleted file mode 100644 index e1f57a9..0000000 --- a/kokkos/basic/optional/ThreadPool/aclocal.m4 +++ /dev/null @@ -1,932 +0,0 @@ -# generated automatically by aclocal 1.10 -*- Autoconf -*- - -# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, -# 2005, 2006 Free Software Foundation, Inc. -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -m4_if(m4_PACKAGE_VERSION, [2.61],, -[m4_fatal([this file was generated for autoconf 2.61. -You have another version of autoconf. If you want to use that, -you should regenerate the build system entirely.], [63])]) - -# Copyright (C) 2002, 2003, 2005, 2006 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# AM_AUTOMAKE_VERSION(VERSION) -# ---------------------------- -# Automake X.Y traces this macro to ensure aclocal.m4 has been -# generated from the m4 files accompanying Automake X.Y. -# (This private macro should not be called outside this file.) -AC_DEFUN([AM_AUTOMAKE_VERSION], -[am__api_version='1.10' -dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to -dnl require some minimum version. Point them to the right macro. -m4_if([$1], [1.10], [], - [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl -]) - -# _AM_AUTOCONF_VERSION(VERSION) -# ----------------------------- -# aclocal traces this macro to find the Autoconf version. -# This is a private macro too. Using m4_define simplifies -# the logic in aclocal, which can simply ignore this definition. -m4_define([_AM_AUTOCONF_VERSION], []) - -# AM_SET_CURRENT_AUTOMAKE_VERSION -# ------------------------------- -# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. -# This function is AC_REQUIREd by AC_INIT_AUTOMAKE. -AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], -[AM_AUTOMAKE_VERSION([1.10])dnl -_AM_AUTOCONF_VERSION(m4_PACKAGE_VERSION)]) - -# AM_AUX_DIR_EXPAND -*- Autoconf -*- - -# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets -# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to -# `$srcdir', `$srcdir/..', or `$srcdir/../..'. -# -# Of course, Automake must honor this variable whenever it calls a -# tool from the auxiliary directory. The problem is that $srcdir (and -# therefore $ac_aux_dir as well) can be either absolute or relative, -# depending on how configure is run. This is pretty annoying, since -# it makes $ac_aux_dir quite unusable in subdirectories: in the top -# source directory, any form will work fine, but in subdirectories a -# relative path needs to be adjusted first. -# -# $ac_aux_dir/missing -# fails when called from a subdirectory if $ac_aux_dir is relative -# $top_srcdir/$ac_aux_dir/missing -# fails if $ac_aux_dir is absolute, -# fails when called from a subdirectory in a VPATH build with -# a relative $ac_aux_dir -# -# The reason of the latter failure is that $top_srcdir and $ac_aux_dir -# are both prefixed by $srcdir. In an in-source build this is usually -# harmless because $srcdir is `.', but things will broke when you -# start a VPATH build or use an absolute $srcdir. -# -# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, -# iff we strip the leading $srcdir from $ac_aux_dir. That would be: -# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` -# and then we would define $MISSING as -# MISSING="\${SHELL} $am_aux_dir/missing" -# This will work as long as MISSING is not called from configure, because -# unfortunately $(top_srcdir) has no meaning in configure. -# However there are other variables, like CC, which are often used in -# configure, and could therefore not use this "fixed" $ac_aux_dir. -# -# Another solution, used here, is to always expand $ac_aux_dir to an -# absolute PATH. The drawback is that using absolute paths prevent a -# configured tree to be moved without reconfiguration. - -AC_DEFUN([AM_AUX_DIR_EXPAND], -[dnl Rely on autoconf to set up CDPATH properly. -AC_PREREQ([2.50])dnl -# expand $ac_aux_dir to an absolute path -am_aux_dir=`cd $ac_aux_dir && pwd` -]) - -# AM_CONDITIONAL -*- Autoconf -*- - -# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 8 - -# AM_CONDITIONAL(NAME, SHELL-CONDITION) -# ------------------------------------- -# Define a conditional. -AC_DEFUN([AM_CONDITIONAL], -[AC_PREREQ(2.52)dnl - ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], - [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl -AC_SUBST([$1_TRUE])dnl -AC_SUBST([$1_FALSE])dnl -_AM_SUBST_NOTMAKE([$1_TRUE])dnl -_AM_SUBST_NOTMAKE([$1_FALSE])dnl -if $2; then - $1_TRUE= - $1_FALSE='#' -else - $1_TRUE='#' - $1_FALSE= -fi -AC_CONFIG_COMMANDS_PRE( -[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then - AC_MSG_ERROR([[conditional "$1" was never defined. -Usually this means the macro was only invoked conditionally.]]) -fi])]) - -# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 9 - -# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be -# written in clear, in which case automake, when reading aclocal.m4, -# will think it sees a *use*, and therefore will trigger all it's -# C support machinery. Also note that it means that autoscan, seeing -# CC etc. in the Makefile, will ask for an AC_PROG_CC use... - - -# _AM_DEPENDENCIES(NAME) -# ---------------------- -# See how the compiler implements dependency checking. -# NAME is "CC", "CXX", "GCJ", or "OBJC". -# We try a few techniques and use that to set a single cache variable. -# -# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was -# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular -# dependency, and given that the user is not expected to run this macro, -# just rely on AC_PROG_CC. -AC_DEFUN([_AM_DEPENDENCIES], -[AC_REQUIRE([AM_SET_DEPDIR])dnl -AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl -AC_REQUIRE([AM_MAKE_INCLUDE])dnl -AC_REQUIRE([AM_DEP_TRACK])dnl - -ifelse([$1], CC, [depcc="$CC" am_compiler_list=], - [$1], CXX, [depcc="$CXX" am_compiler_list=], - [$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'], - [$1], UPC, [depcc="$UPC" am_compiler_list=], - [$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'], - [depcc="$$1" am_compiler_list=]) - -AC_CACHE_CHECK([dependency style of $depcc], - [am_cv_$1_dependencies_compiler_type], -[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then - # We make a subdir and do the tests there. Otherwise we can end up - # making bogus files that we don't know about and never remove. For - # instance it was reported that on HP-UX the gcc test will end up - # making a dummy file named `D' -- because `-MD' means `put the output - # in D'. - mkdir conftest.dir - # Copy depcomp to subdir because otherwise we won't find it if we're - # using a relative directory. - cp "$am_depcomp" conftest.dir - cd conftest.dir - # We will build objects and dependencies in a subdirectory because - # it helps to detect inapplicable dependency modes. For instance - # both Tru64's cc and ICC support -MD to output dependencies as a - # side effect of compilation, but ICC will put the dependencies in - # the current directory while Tru64 will put them in the object - # directory. - mkdir sub - - am_cv_$1_dependencies_compiler_type=none - if test "$am_compiler_list" = ""; then - am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` - fi - for depmode in $am_compiler_list; do - # Setup a source with many dependencies, because some compilers - # like to wrap large dependency lists on column 80 (with \), and - # we should not choose a depcomp mode which is confused by this. - # - # We need to recreate these files for each test, as the compiler may - # overwrite some of them when testing with obscure command lines. - # This happens at least with the AIX C compiler. - : > sub/conftest.c - for i in 1 2 3 4 5 6; do - echo '#include "conftst'$i'.h"' >> sub/conftest.c - # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with - # Solaris 8's {/usr,}/bin/sh. - touch sub/conftst$i.h - done - echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf - - case $depmode in - nosideeffect) - # after this tag, mechanisms are not by side-effect, so they'll - # only be used when explicitly requested - if test "x$enable_dependency_tracking" = xyes; then - continue - else - break - fi - ;; - none) break ;; - esac - # We check with `-c' and `-o' for the sake of the "dashmstdout" - # mode. It turns out that the SunPro C++ compiler does not properly - # handle `-M -o', and we need to detect this. - if depmode=$depmode \ - source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ - depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ - $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ - >/dev/null 2>conftest.err && - grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && - ${MAKE-make} -s -f confmf > /dev/null 2>&1; then - # icc doesn't choke on unknown options, it will just issue warnings - # or remarks (even with -Werror). So we grep stderr for any message - # that says an option was ignored or not supported. - # When given -MP, icc 7.0 and 7.1 complain thusly: - # icc: Command line warning: ignoring option '-M'; no argument required - # The diagnosis changed in icc 8.0: - # icc: Command line remark: option '-MP' not supported - if (grep 'ignoring option' conftest.err || - grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else - am_cv_$1_dependencies_compiler_type=$depmode - break - fi - fi - done - - cd .. - rm -rf conftest.dir -else - am_cv_$1_dependencies_compiler_type=none -fi -]) -AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) -AM_CONDITIONAL([am__fastdep$1], [ - test "x$enable_dependency_tracking" != xno \ - && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) -]) - - -# AM_SET_DEPDIR -# ------------- -# Choose a directory name for dependency files. -# This macro is AC_REQUIREd in _AM_DEPENDENCIES -AC_DEFUN([AM_SET_DEPDIR], -[AC_REQUIRE([AM_SET_LEADING_DOT])dnl -AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl -]) - - -# AM_DEP_TRACK -# ------------ -AC_DEFUN([AM_DEP_TRACK], -[AC_ARG_ENABLE(dependency-tracking, -[ --disable-dependency-tracking speeds up one-time build - --enable-dependency-tracking do not reject slow dependency extractors]) -if test "x$enable_dependency_tracking" != xno; then - am_depcomp="$ac_aux_dir/depcomp" - AMDEPBACKSLASH='\' -fi -AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) -AC_SUBST([AMDEPBACKSLASH])dnl -_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl -]) - -# Generate code to set up dependency tracking. -*- Autoconf -*- - -# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -#serial 3 - -# _AM_OUTPUT_DEPENDENCY_COMMANDS -# ------------------------------ -AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], -[for mf in $CONFIG_FILES; do - # Strip MF so we end up with the name of the file. - mf=`echo "$mf" | sed -e 's/:.*$//'` - # Check whether this is an Automake generated Makefile or not. - # We used to match only the files named `Makefile.in', but - # some people rename them; so instead we look at the file content. - # Grep'ing the first line is not enough: some people post-process - # each Makefile.in and add a new line on top of each file to say so. - # Grep'ing the whole file is not good either: AIX grep has a line - # limit of 2048, but all sed's we know have understand at least 4000. - if sed 10q "$mf" | grep '^#.*generated by automake' > /dev/null 2>&1; then - dirpart=`AS_DIRNAME("$mf")` - else - continue - fi - # Extract the definition of DEPDIR, am__include, and am__quote - # from the Makefile without running `make'. - DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` - test -z "$DEPDIR" && continue - am__include=`sed -n 's/^am__include = //p' < "$mf"` - test -z "am__include" && continue - am__quote=`sed -n 's/^am__quote = //p' < "$mf"` - # When using ansi2knr, U may be empty or an underscore; expand it - U=`sed -n 's/^U = //p' < "$mf"` - # Find all dependency output files, they are included files with - # $(DEPDIR) in their names. We invoke sed twice because it is the - # simplest approach to changing $(DEPDIR) to its actual value in the - # expansion. - for file in `sed -n " - s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ - sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do - # Make sure the directory exists. - test -f "$dirpart/$file" && continue - fdir=`AS_DIRNAME(["$file"])` - AS_MKDIR_P([$dirpart/$fdir]) - # echo "creating $dirpart/$file" - echo '# dummy' > "$dirpart/$file" - done -done -])# _AM_OUTPUT_DEPENDENCY_COMMANDS - - -# AM_OUTPUT_DEPENDENCY_COMMANDS -# ----------------------------- -# This macro should only be invoked once -- use via AC_REQUIRE. -# -# This code is only required when automatic dependency tracking -# is enabled. FIXME. This creates each `.P' file that we will -# need in order to bootstrap the dependency handling code. -AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], -[AC_CONFIG_COMMANDS([depfiles], - [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], - [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) -]) - -# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 8 - -# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS. -AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)]) - -# Do all the work for Automake. -*- Autoconf -*- - -# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, -# 2005, 2006 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 12 - -# This macro actually does too much. Some checks are only needed if -# your package does certain things. But this isn't really a big deal. - -# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) -# AM_INIT_AUTOMAKE([OPTIONS]) -# ----------------------------------------------- -# The call with PACKAGE and VERSION arguments is the old style -# call (pre autoconf-2.50), which is being phased out. PACKAGE -# and VERSION should now be passed to AC_INIT and removed from -# the call to AM_INIT_AUTOMAKE. -# We support both call styles for the transition. After -# the next Automake release, Autoconf can make the AC_INIT -# arguments mandatory, and then we can depend on a new Autoconf -# release and drop the old call support. -AC_DEFUN([AM_INIT_AUTOMAKE], -[AC_PREREQ([2.60])dnl -dnl Autoconf wants to disallow AM_ names. We explicitly allow -dnl the ones we care about. -m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl -AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl -AC_REQUIRE([AC_PROG_INSTALL])dnl -if test "`cd $srcdir && pwd`" != "`pwd`"; then - # Use -I$(srcdir) only when $(srcdir) != ., so that make's output - # is not polluted with repeated "-I." - AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl - # test to see if srcdir already configured - if test -f $srcdir/config.status; then - AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) - fi -fi - -# test whether we have cygpath -if test -z "$CYGPATH_W"; then - if (cygpath --version) >/dev/null 2>/dev/null; then - CYGPATH_W='cygpath -w' - else - CYGPATH_W=echo - fi -fi -AC_SUBST([CYGPATH_W]) - -# Define the identity of the package. -dnl Distinguish between old-style and new-style calls. -m4_ifval([$2], -[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl - AC_SUBST([PACKAGE], [$1])dnl - AC_SUBST([VERSION], [$2])], -[_AM_SET_OPTIONS([$1])dnl -dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. -m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,, - [m4_fatal([AC_INIT should be called with package and version arguments])])dnl - AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl - AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl - -_AM_IF_OPTION([no-define],, -[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package]) - AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl - -# Some tools Automake needs. -AC_REQUIRE([AM_SANITY_CHECK])dnl -AC_REQUIRE([AC_ARG_PROGRAM])dnl -AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version}) -AM_MISSING_PROG(AUTOCONF, autoconf) -AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version}) -AM_MISSING_PROG(AUTOHEADER, autoheader) -AM_MISSING_PROG(MAKEINFO, makeinfo) -AM_PROG_INSTALL_SH -AM_PROG_INSTALL_STRIP -AC_REQUIRE([AM_PROG_MKDIR_P])dnl -# We need awk for the "check" target. The system "awk" is bad on -# some platforms. -AC_REQUIRE([AC_PROG_AWK])dnl -AC_REQUIRE([AC_PROG_MAKE_SET])dnl -AC_REQUIRE([AM_SET_LEADING_DOT])dnl -_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], - [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], - [_AM_PROG_TAR([v7])])]) -_AM_IF_OPTION([no-dependencies],, -[AC_PROVIDE_IFELSE([AC_PROG_CC], - [_AM_DEPENDENCIES(CC)], - [define([AC_PROG_CC], - defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [_AM_DEPENDENCIES(CXX)], - [define([AC_PROG_CXX], - defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl -AC_PROVIDE_IFELSE([AC_PROG_OBJC], - [_AM_DEPENDENCIES(OBJC)], - [define([AC_PROG_OBJC], - defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl -]) -]) - - -# When config.status generates a header, we must update the stamp-h file. -# This file resides in the same directory as the config header -# that is generated. The stamp files are numbered to have different names. - -# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the -# loop where config.status creates the headers, so we can generate -# our stamp files there. -AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], -[# Compute $1's index in $config_headers. -_am_stamp_count=1 -for _am_header in $config_headers :; do - case $_am_header in - $1 | $1:* ) - break ;; - * ) - _am_stamp_count=`expr $_am_stamp_count + 1` ;; - esac -done -echo "timestamp for $1" >`AS_DIRNAME([$1])`/stamp-h[]$_am_stamp_count]) - -# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# AM_PROG_INSTALL_SH -# ------------------ -# Define $install_sh. -AC_DEFUN([AM_PROG_INSTALL_SH], -[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl -install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"} -AC_SUBST(install_sh)]) - -# Copyright (C) 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 2 - -# Check whether the underlying file-system supports filenames -# with a leading dot. For instance MS-DOS doesn't. -AC_DEFUN([AM_SET_LEADING_DOT], -[rm -rf .tst 2>/dev/null -mkdir .tst 2>/dev/null -if test -d .tst; then - am__leading_dot=. -else - am__leading_dot=_ -fi -rmdir .tst 2>/dev/null -AC_SUBST([am__leading_dot])]) - -# Add --enable-maintainer-mode option to configure. -*- Autoconf -*- -# From Jim Meyering - -# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 4 - -AC_DEFUN([AM_MAINTAINER_MODE], -[AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) - dnl maintainer-mode is disabled by default - AC_ARG_ENABLE(maintainer-mode, -[ --enable-maintainer-mode enable make rules and dependencies not useful - (and sometimes confusing) to the casual installer], - USE_MAINTAINER_MODE=$enableval, - USE_MAINTAINER_MODE=no) - AC_MSG_RESULT([$USE_MAINTAINER_MODE]) - AM_CONDITIONAL(MAINTAINER_MODE, [test $USE_MAINTAINER_MODE = yes]) - MAINT=$MAINTAINER_MODE_TRUE - AC_SUBST(MAINT)dnl -] -) - -AU_DEFUN([jm_MAINTAINER_MODE], [AM_MAINTAINER_MODE]) - -# Check to see how 'make' treats includes. -*- Autoconf -*- - -# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 3 - -# AM_MAKE_INCLUDE() -# ----------------- -# Check to see how make treats includes. -AC_DEFUN([AM_MAKE_INCLUDE], -[am_make=${MAKE-make} -cat > confinc << 'END' -am__doit: - @echo done -.PHONY: am__doit -END -# If we don't find an include directive, just comment out the code. -AC_MSG_CHECKING([for style of include used by $am_make]) -am__include="#" -am__quote= -_am_result=none -# First try GNU make style include. -echo "include confinc" > confmf -# We grep out `Entering directory' and `Leaving directory' -# messages which can occur if `w' ends up in MAKEFLAGS. -# In particular we don't look at `^make:' because GNU make might -# be invoked under some other name (usually "gmake"), in which -# case it prints its new name instead of `make'. -if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then - am__include=include - am__quote= - _am_result=GNU -fi -# Now try BSD make style include. -if test "$am__include" = "#"; then - echo '.include "confinc"' > confmf - if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then - am__include=.include - am__quote="\"" - _am_result=BSD - fi -fi -AC_SUBST([am__include]) -AC_SUBST([am__quote]) -AC_MSG_RESULT([$_am_result]) -rm -f confinc confmf -]) - -# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- - -# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 5 - -# AM_MISSING_PROG(NAME, PROGRAM) -# ------------------------------ -AC_DEFUN([AM_MISSING_PROG], -[AC_REQUIRE([AM_MISSING_HAS_RUN]) -$1=${$1-"${am_missing_run}$2"} -AC_SUBST($1)]) - - -# AM_MISSING_HAS_RUN -# ------------------ -# Define MISSING if not defined so far and test if it supports --run. -# If it does, set am_missing_run to use it, otherwise, to nothing. -AC_DEFUN([AM_MISSING_HAS_RUN], -[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl -AC_REQUIRE_AUX_FILE([missing])dnl -test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing" -# Use eval to expand $SHELL -if eval "$MISSING --run true"; then - am_missing_run="$MISSING --run " -else - am_missing_run= - AC_MSG_WARN([`missing' script is too old or missing]) -fi -]) - -# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# AM_PROG_MKDIR_P -# --------------- -# Check for `mkdir -p'. -AC_DEFUN([AM_PROG_MKDIR_P], -[AC_PREREQ([2.60])dnl -AC_REQUIRE([AC_PROG_MKDIR_P])dnl -dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P, -dnl while keeping a definition of mkdir_p for backward compatibility. -dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile. -dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of -dnl Makefile.ins that do not define MKDIR_P, so we do our own -dnl adjustment using top_builddir (which is defined more often than -dnl MKDIR_P). -AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl -case $mkdir_p in - [[\\/$]]* | ?:[[\\/]]*) ;; - */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;; -esac -]) - -# Helper functions for option handling. -*- Autoconf -*- - -# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 3 - -# _AM_MANGLE_OPTION(NAME) -# ----------------------- -AC_DEFUN([_AM_MANGLE_OPTION], -[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) - -# _AM_SET_OPTION(NAME) -# ------------------------------ -# Set option NAME. Presently that only means defining a flag for this option. -AC_DEFUN([_AM_SET_OPTION], -[m4_define(_AM_MANGLE_OPTION([$1]), 1)]) - -# _AM_SET_OPTIONS(OPTIONS) -# ---------------------------------- -# OPTIONS is a space-separated list of Automake options. -AC_DEFUN([_AM_SET_OPTIONS], -[AC_FOREACH([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) - -# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) -# ------------------------------------------- -# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. -AC_DEFUN([_AM_IF_OPTION], -[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) - -# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# AM_RUN_LOG(COMMAND) -# ------------------- -# Run COMMAND, save the exit status in ac_status, and log it. -# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) -AC_DEFUN([AM_RUN_LOG], -[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD - ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD - (exit $ac_status); }]) - -# Check to make sure that the build environment is sane. -*- Autoconf -*- - -# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005 -# Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 4 - -# AM_SANITY_CHECK -# --------------- -AC_DEFUN([AM_SANITY_CHECK], -[AC_MSG_CHECKING([whether build environment is sane]) -# Just in case -sleep 1 -echo timestamp > conftest.file -# Do `set' in a subshell so we don't clobber the current shell's -# arguments. Must try -L first in case configure is actually a -# symlink; some systems play weird games with the mod time of symlinks -# (eg FreeBSD returns the mod time of the symlink's containing -# directory). -if ( - set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null` - if test "$[*]" = "X"; then - # -L didn't work. - set X `ls -t $srcdir/configure conftest.file` - fi - rm -f conftest.file - if test "$[*]" != "X $srcdir/configure conftest.file" \ - && test "$[*]" != "X conftest.file $srcdir/configure"; then - - # If neither matched, then we have a broken ls. This can happen - # if, for instance, CONFIG_SHELL is bash and it inherits a - # broken ls alias from the environment. This has actually - # happened. Such a system could not be considered "sane". - AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken -alias in your environment]) - fi - - test "$[2]" = conftest.file - ) -then - # Ok. - : -else - AC_MSG_ERROR([newly created file is older than distributed files! -Check your system clock]) -fi -AC_MSG_RESULT(yes)]) - -# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# AM_PROG_INSTALL_STRIP -# --------------------- -# One issue with vendor `install' (even GNU) is that you can't -# specify the program used to strip binaries. This is especially -# annoying in cross-compiling environments, where the build's strip -# is unlikely to handle the host's binaries. -# Fortunately install-sh will honor a STRIPPROG variable, so we -# always use install-sh in `make install-strip', and initialize -# STRIPPROG with the value of the STRIP variable (set by the user). -AC_DEFUN([AM_PROG_INSTALL_STRIP], -[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl -# Installed binaries are usually stripped using `strip' when the user -# run `make install-strip'. However `strip' might not be the right -# tool to use in cross-compilation environments, therefore Automake -# will honor the `STRIP' environment variable to overrule this program. -dnl Don't test for $cross_compiling = yes, because it might be `maybe'. -if test "$cross_compiling" != no; then - AC_CHECK_TOOL([STRIP], [strip], :) -fi -INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" -AC_SUBST([INSTALL_STRIP_PROGRAM])]) - -# Copyright (C) 2006 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# _AM_SUBST_NOTMAKE(VARIABLE) -# --------------------------- -# Prevent Automake from outputing VARIABLE = @VARIABLE@ in Makefile.in. -# This macro is traced by Automake. -AC_DEFUN([_AM_SUBST_NOTMAKE]) - -# Check how to create a tarball. -*- Autoconf -*- - -# Copyright (C) 2004, 2005 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 2 - -# _AM_PROG_TAR(FORMAT) -# -------------------- -# Check how to create a tarball in format FORMAT. -# FORMAT should be one of `v7', `ustar', or `pax'. -# -# Substitute a variable $(am__tar) that is a command -# writing to stdout a FORMAT-tarball containing the directory -# $tardir. -# tardir=directory && $(am__tar) > result.tar -# -# Substitute a variable $(am__untar) that extract such -# a tarball read from stdin. -# $(am__untar) < result.tar -AC_DEFUN([_AM_PROG_TAR], -[# Always define AMTAR for backward compatibility. -AM_MISSING_PROG([AMTAR], [tar]) -m4_if([$1], [v7], - [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'], - [m4_case([$1], [ustar],, [pax],, - [m4_fatal([Unknown tar format])]) -AC_MSG_CHECKING([how to create a $1 tar archive]) -# Loop over all known methods to create a tar archive until one works. -_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' -_am_tools=${am_cv_prog_tar_$1-$_am_tools} -# Do not fold the above two line into one, because Tru64 sh and -# Solaris sh will not grok spaces in the rhs of `-'. -for _am_tool in $_am_tools -do - case $_am_tool in - gnutar) - for _am_tar in tar gnutar gtar; - do - AM_RUN_LOG([$_am_tar --version]) && break - done - am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' - am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' - am__untar="$_am_tar -xf -" - ;; - plaintar) - # Must skip GNU tar: if it does not support --format= it doesn't create - # ustar tarball either. - (tar --version) >/dev/null 2>&1 && continue - am__tar='tar chf - "$$tardir"' - am__tar_='tar chf - "$tardir"' - am__untar='tar xf -' - ;; - pax) - am__tar='pax -L -x $1 -w "$$tardir"' - am__tar_='pax -L -x $1 -w "$tardir"' - am__untar='pax -r' - ;; - cpio) - am__tar='find "$$tardir" -print | cpio -o -H $1 -L' - am__tar_='find "$tardir" -print | cpio -o -H $1 -L' - am__untar='cpio -i -H $1 -d' - ;; - none) - am__tar=false - am__tar_=false - am__untar=false - ;; - esac - - # If the value was cached, stop now. We just wanted to have am__tar - # and am__untar set. - test -n "${am_cv_prog_tar_$1}" && break - - # tar/untar a dummy directory, and stop if the command works - rm -rf conftest.dir - mkdir conftest.dir - echo GrepMe > conftest.dir/file - AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) - rm -rf conftest.dir - if test -s conftest.tar; then - AM_RUN_LOG([$am__untar /dev/null 2>&1 && break - fi -done -rm -rf conftest.dir - -AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) -AC_MSG_RESULT([$am_cv_prog_tar_$1])]) -AC_SUBST([am__tar]) -AC_SUBST([am__untar]) -]) # _AM_PROG_TAR - -m4_include([config/acx_pthread.m4]) -m4_include([config/tac_arg_check_mpi.m4]) -m4_include([config/tac_arg_config_mpi.m4]) -m4_include([config/tac_arg_enable_export-makefiles.m4]) -m4_include([config/tac_arg_enable_feature.m4]) -m4_include([config/tac_arg_enable_feature_sub_check.m4]) -m4_include([config/tac_arg_with_ar.m4]) -m4_include([config/tac_arg_with_flags.m4]) -m4_include([config/tac_arg_with_incdirs.m4]) -m4_include([config/tac_arg_with_libdirs.m4]) -m4_include([config/tac_arg_with_libs.m4]) -m4_include([config/tac_arg_with_perl.m4]) diff --git a/kokkos/basic/optional/ThreadPool/bootstrap b/kokkos/basic/optional/ThreadPool/bootstrap deleted file mode 100755 index 8706e9e..0000000 --- a/kokkos/basic/optional/ThreadPool/bootstrap +++ /dev/null @@ -1,9 +0,0 @@ -#! /bin/sh -#np# This file does not need to be edited, other than removing this line. -set -x -# Only run aclocal if we need to create aclocal.m4 -aclocal -I config -# autoheader is smart and doesn't change anything unless it's necessary -autoheader -automake --foreign --add-missing --copy -autoconf diff --git a/kokkos/basic/optional/ThreadPool/cmake/Dependencies.cmake b/kokkos/basic/optional/ThreadPool/cmake/Dependencies.cmake deleted file mode 100644 index 746d066..0000000 --- a/kokkos/basic/optional/ThreadPool/cmake/Dependencies.cmake +++ /dev/null @@ -1,11 +0,0 @@ -SET(LIB_REQUIRED_DEP_PACKAGES) -SET(LIB_OPTIONAL_DEP_PACKAGES) -SET(TEST_REQUIRED_DEP_PACKAGES) -SET(TEST_OPTIONAL_DEP_PACKAGES) -SET(LIB_REQUIRED_DEP_TPLS) -SET(LIB_OPTIONAL_DEP_TPLS Pthread MPI) -SET(TEST_REQUIRED_DEP_TPLS) -SET(TEST_OPTIONAL_DEP_TPLS) - -TPL_TENTATIVELY_ENABLE(Pthread) - diff --git a/kokkos/basic/optional/ThreadPool/cmake/ThreadPool_config.h.in b/kokkos/basic/optional/ThreadPool/cmake/ThreadPool_config.h.in deleted file mode 100644 index 55614b9..0000000 --- a/kokkos/basic/optional/ThreadPool/cmake/ThreadPool_config.h.in +++ /dev/null @@ -1,2 +0,0 @@ -#cmakedefine HAVE_MPI -#cmakedefine HAVE_PTHREAD diff --git a/kokkos/basic/optional/ThreadPool/config/acx_pthread.m4 b/kokkos/basic/optional/ThreadPool/config/acx_pthread.m4 deleted file mode 100644 index 3bd3ec2..0000000 --- a/kokkos/basic/optional/ThreadPool/config/acx_pthread.m4 +++ /dev/null @@ -1,224 +0,0 @@ -dnl @synopsis ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) -dnl -dnl This macro figures out how to build C programs using POSIX -dnl threads. It sets the PTHREAD_LIBS output variable to the threads -dnl library and linker flags, and the PTHREAD_CFLAGS output variable -dnl to any special C compiler flags that are needed. (The user can also -dnl force certain compiler flags/libs to be tested by setting these -dnl environment variables.) -dnl -dnl Also sets PTHREAD_CC to any special C compiler that is needed for -dnl multi-threaded programs (defaults to the value of CC otherwise). -dnl (This is necessary on AIX to use the special cc_r compiler alias.) -dnl -dnl If you are only building threads programs, you may wish to -dnl use these variables in your default LIBS, CFLAGS, and CC: -dnl -dnl LIBS="$PTHREAD_LIBS $LIBS" -dnl CFLAGS="$CFLAGS $PTHREAD_CFLAGS" -dnl CC="$PTHREAD_CC" -dnl -dnl In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute -dnl constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE -dnl to that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). -dnl -dnl ACTION-IF-FOUND is a list of shell commands to run if a threads -dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands -dnl to run it if it is not found. If ACTION-IF-FOUND is not specified, -dnl the default action will define HAVE_PTHREAD. -dnl -dnl Please let the authors know if this macro fails on any platform, -dnl or if you have any other suggestions or comments. This macro was -dnl based on work by SGJ on autoconf scripts for FFTW (www.fftw.org) -dnl (with help from M. Frigo), as well as ac_pthread and hb_pthread -dnl macros posted by AFC to the autoconf macro repository. We are also -dnl grateful for the helpful feedback of numerous users. -dnl -dnl @version $Id$ -dnl @author Steven G. Johnson and Alejandro Forero Cuervo - -AC_DEFUN([ACX_PTHREAD], [ -AC_REQUIRE([AC_CANONICAL_HOST]) -acx_pthread_ok=no - -# First, check if the POSIX threads header, pthread.h, is available. -# If it isn't, don't bother looking for the threads libraries. -AC_CHECK_HEADER(pthread.h, , acx_pthread_ok=noheader) - -# We must check for the threads library under a number of different -# names; the ordering is very important because some systems -# (e.g. DEC) have both -lpthread and -lpthreads, where one of the -# libraries is broken (non-POSIX). - -# First of all, check if the user has set any of the PTHREAD_LIBS, -# etcetera environment variables, and if threads linking works using -# them: -if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) - AC_TRY_LINK_FUNC(pthread_join, acx_pthread_ok=yes) - AC_MSG_RESULT($acx_pthread_ok) - if test x"$acx_pthread_ok" = xno; then - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" - fi - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" -fi - -# Create a list of thread flags to try. Items starting with a "-" are -# C compiler flags, and other items are library names, except for "none" -# which indicates that we try without any flags at all. - -acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt" - -# The ordering *is* (sometimes) important. Some notes on the -# individual items follow: - -# pthreads: AIX (must check this before -lpthread) -# none: in case threads are in libc; should be tried before -Kthread and -# other compiler flags to prevent continual compiler warnings -# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) -# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) -# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) -# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) -# -pthreads: Solaris/gcc -# -mthreads: Mingw32/gcc, Lynx/gcc -# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it -# doesn't hurt to check since this sometimes defines pthreads too; -# also defines -D_REENTRANT) -# pthread: Linux, etcetera -# --thread-safe: KAI C++ - -case "${host_cpu}-${host_os}" in - *solaris*) - - # On Solaris (at least, for some versions), libc contains stubbed - # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (We need to link with -pthread or - # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather - # a function called by this macro, so we could check for that, but - # who knows whether they'll stub that too in a future libc.) So, - # we'll just look for -pthreads and -lpthread first: - - acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags" - ;; -esac - -if test x"$acx_pthread_ok" = xno; then -for flag in $acx_pthread_flags; do - - case $flag in - none) - AC_MSG_CHECKING([whether pthreads work without any flags]) - ;; - - -*) - AC_MSG_CHECKING([whether pthreads work with $flag]) - PTHREAD_CFLAGS="$flag" - ;; - - *) - AC_MSG_CHECKING([for the pthreads library -l$flag]) - PTHREAD_LIBS="-l$flag" - ;; - esac - - save_LIBS="$LIBS" - save_CFLAGS="$CFLAGS" - LIBS="$PTHREAD_LIBS $LIBS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Check for various functions. We must include pthread.h, - # since some functions may be macros. (On the Sequent, we - # need a special flag -Kthread to make this header compile.) - # We check for pthread_join because it is in -lpthread on IRIX - # while pthread_create is in libc. We check for pthread_attr_init - # due to DEC craziness with -lpthreads. We check for - # pthread_cleanup_push because it is one of the few pthread - # functions on Solaris that doesn't have a non-functional libc stub. - # We try pthread_create on general principles. - AC_TRY_LINK([#include ], - [pthread_t th; pthread_join(th, 0); - pthread_attr_init(0); pthread_cleanup_push(0, 0); - pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], - [acx_pthread_ok=yes]) - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - AC_MSG_RESULT($acx_pthread_ok) - if test "x$acx_pthread_ok" = xyes; then - break; - fi - - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" -done -fi - -# Various other checks: -if test "x$acx_pthread_ok" = xyes; then - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Detect AIX lossage: threads are created detached by default - # and the JOINABLE attribute has a nonstandard name (UNDETACHED). - AC_MSG_CHECKING([for joinable pthread attribute]) - AC_TRY_LINK([#include ], - [int attr=PTHREAD_CREATE_JOINABLE;], - ok=PTHREAD_CREATE_JOINABLE, ok=unknown) - if test x"$ok" = xunknown; then - AC_TRY_LINK([#include ], - [int attr=PTHREAD_CREATE_UNDETACHED;], - ok=PTHREAD_CREATE_UNDETACHED, ok=unknown) - fi - if test x"$ok" != xPTHREAD_CREATE_JOINABLE; then - AC_DEFINE(PTHREAD_CREATE_JOINABLE, $ok, - [Define to the necessary symbol if this constant - uses a non-standard name on your system.]) - fi - AC_MSG_RESULT(${ok}) - if test x"$ok" = xunknown; then - AC_MSG_WARN([we do not know how to create joinable pthreads]) - fi - - AC_MSG_CHECKING([if more special flags are required for pthreads]) - flag=no - case "${host_cpu}-${host_os}" in - *-aix* | *-freebsd*) flag="-D_THREAD_SAFE";; - *solaris* | alpha*-osf*) flag="-D_REENTRANT";; - esac - AC_MSG_RESULT(${flag}) - if test "x$flag" != xno; then - PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" - fi - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - # More AIX lossage: must compile with cc_r - AC_CHECK_PROG(PTHREAD_CC, cc_r, cc_r, ${CC}) -else - PTHREAD_CC="$CC" -fi - -AC_SUBST(PTHREAD_LIBS) -AC_SUBST(PTHREAD_CFLAGS) -AC_SUBST(PTHREAD_CC) - -# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: -if test x"$acx_pthread_ok" = xyes; then - ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) - : -else - acx_pthread_ok=no - $2 -fi - -])dnl ACX_PTHREAD diff --git a/kokkos/basic/optional/ThreadPool/config/config.guess b/kokkos/basic/optional/ThreadPool/config/config.guess deleted file mode 100755 index 396482d..0000000 --- a/kokkos/basic/optional/ThreadPool/config/config.guess +++ /dev/null @@ -1,1500 +0,0 @@ -#! /bin/sh -# Attempt to guess a canonical system name. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, -# Inc. - -timestamp='2006-07-02' - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA -# 02110-1301, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - - -# Originally written by Per Bothner . -# Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. -# -# This script attempts to guess a canonical system name similar to -# config.sub. If it succeeds, it prints the system name on stdout, and -# exits with 0. Otherwise, it exits with 1. -# -# The plan is that this can be called by configure scripts if you -# don't specify an explicit build system type. - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] - -Output the configuration name of the system \`$me' is run on. - -Operation modes: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to ." - -version="\ -GNU config.guess ($timestamp) - -Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 -Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit ;; - --version | -v ) - echo "$version" ; exit ;; - --help | --h* | -h ) - echo "$usage"; exit ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" >&2 - exit 1 ;; - * ) - break ;; - esac -done - -if test $# != 0; then - echo "$me: too many arguments$help" >&2 - exit 1 -fi - -trap 'exit 1' 1 2 15 - -# CC_FOR_BUILD -- compiler used by this script. Note that the use of a -# compiler to aid in system detection is discouraged as it requires -# temporary files to be created and, as you can see below, it is a -# headache to deal with in a portable fashion. - -# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still -# use `HOST_CC' if defined, but it is deprecated. - -# Portable tmp directory creation inspired by the Autoconf team. - -set_cc_for_build=' -trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; -trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; -: ${TMPDIR=/tmp} ; - { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || - { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || - { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || - { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; -dummy=$tmp/dummy ; -tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; -case $CC_FOR_BUILD,$HOST_CC,$CC in - ,,) echo "int x;" > $dummy.c ; - for c in cc gcc c89 c99 ; do - if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then - CC_FOR_BUILD="$c"; break ; - fi ; - done ; - if test x"$CC_FOR_BUILD" = x ; then - CC_FOR_BUILD=no_compiler_found ; - fi - ;; - ,,*) CC_FOR_BUILD=$CC ;; - ,*,*) CC_FOR_BUILD=$HOST_CC ;; -esac ; set_cc_for_build= ;' - -# This is needed to find uname on a Pyramid OSx when run in the BSD universe. -# (ghazi@noc.rutgers.edu 1994-08-24) -if (test -f /.attbin/uname) >/dev/null 2>&1 ; then - PATH=$PATH:/.attbin ; export PATH -fi - -UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown -UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown -UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown -UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown - -# Note: order is significant - the case branches are not exclusive. - -case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in - *:NetBSD:*:*) - # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, - # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently - # switched to ELF, *-*-netbsd* would select the old - # object file format. This provides both forward - # compatibility and a consistent mechanism for selecting the - # object file format. - # - # Note: NetBSD doesn't particularly care about the vendor - # portion of the name. We always set it to "unknown". - sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || echo unknown)` - case "${UNAME_MACHINE_ARCH}" in - armeb) machine=armeb-unknown ;; - arm*) machine=arm-unknown ;; - sh3el) machine=shl-unknown ;; - sh3eb) machine=sh-unknown ;; - *) machine=${UNAME_MACHINE_ARCH}-unknown ;; - esac - # The Operating System including object format, if it has switched - # to ELF recently, or will in the future. - case "${UNAME_MACHINE_ARCH}" in - arm*|i386|m68k|ns32k|sh3*|sparc|vax) - eval $set_cc_for_build - if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep __ELF__ >/dev/null - then - # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). - # Return netbsd for either. FIX? - os=netbsd - else - os=netbsdelf - fi - ;; - *) - os=netbsd - ;; - esac - # The OS release - # Debian GNU/NetBSD machines have a different userland, and - # thus, need a distinct triplet. However, they do not need - # kernel version information, so it can be replaced with a - # suitable tag, in the style of linux-gnu. - case "${UNAME_VERSION}" in - Debian*) - release='-gnu' - ;; - *) - release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - ;; - esac - # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: - # contains redundant information, the shorter form: - # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}" - exit ;; - *:OpenBSD:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` - echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} - exit ;; - *:ekkoBSD:*:*) - echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} - exit ;; - *:SolidBSD:*:*) - echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} - exit ;; - macppc:MirBSD:*:*) - echo powerpc-unknown-mirbsd${UNAME_RELEASE} - exit ;; - *:MirBSD:*:*) - echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} - exit ;; - alpha:OSF1:*:*) - case $UNAME_RELEASE in - *4.0) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` - ;; - *5.*) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` - ;; - esac - # According to Compaq, /usr/sbin/psrinfo has been available on - # OSF/1 and Tru64 systems produced since 1995. I hope that - # covers most systems running today. This code pipes the CPU - # types through head -n 1, so we only detect the type of CPU 0. - ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` - case "$ALPHA_CPU_TYPE" in - "EV4 (21064)") - UNAME_MACHINE="alpha" ;; - "EV4.5 (21064)") - UNAME_MACHINE="alpha" ;; - "LCA4 (21066/21068)") - UNAME_MACHINE="alpha" ;; - "EV5 (21164)") - UNAME_MACHINE="alphaev5" ;; - "EV5.6 (21164A)") - UNAME_MACHINE="alphaev56" ;; - "EV5.6 (21164PC)") - UNAME_MACHINE="alphapca56" ;; - "EV5.7 (21164PC)") - UNAME_MACHINE="alphapca57" ;; - "EV6 (21264)") - UNAME_MACHINE="alphaev6" ;; - "EV6.7 (21264A)") - UNAME_MACHINE="alphaev67" ;; - "EV6.8CB (21264C)") - UNAME_MACHINE="alphaev68" ;; - "EV6.8AL (21264B)") - UNAME_MACHINE="alphaev68" ;; - "EV6.8CX (21264D)") - UNAME_MACHINE="alphaev68" ;; - "EV6.9A (21264/EV69A)") - UNAME_MACHINE="alphaev69" ;; - "EV7 (21364)") - UNAME_MACHINE="alphaev7" ;; - "EV7.9 (21364A)") - UNAME_MACHINE="alphaev79" ;; - esac - # A Pn.n version is a patched version. - # A Vn.n version is a released version. - # A Tn.n version is a released field test version. - # A Xn.n version is an unreleased experimental baselevel. - # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit ;; - Alpha\ *:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # Should we change UNAME_MACHINE based on the output of uname instead - # of the specific Alpha model? - echo alpha-pc-interix - exit ;; - 21064:Windows_NT:50:3) - echo alpha-dec-winnt3.5 - exit ;; - Amiga*:UNIX_System_V:4.0:*) - echo m68k-unknown-sysv4 - exit ;; - *:[Aa]miga[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-amigaos - exit ;; - *:[Mm]orph[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-morphos - exit ;; - *:OS/390:*:*) - echo i370-ibm-openedition - exit ;; - *:z/VM:*:*) - echo s390-ibm-zvmoe - exit ;; - *:OS400:*:*) - echo powerpc-ibm-os400 - exit ;; - arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - echo arm-acorn-riscix${UNAME_RELEASE} - exit ;; - arm:riscos:*:*|arm:RISCOS:*:*) - echo arm-unknown-riscos - exit ;; - SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) - echo hppa1.1-hitachi-hiuxmpp - exit ;; - Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) - # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. - if test "`(/bin/universe) 2>/dev/null`" = att ; then - echo pyramid-pyramid-sysv3 - else - echo pyramid-pyramid-bsd - fi - exit ;; - NILE*:*:*:dcosx) - echo pyramid-pyramid-svr4 - exit ;; - DRS?6000:unix:4.0:6*) - echo sparc-icl-nx6 - exit ;; - DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) - case `/usr/bin/uname -p` in - sparc) echo sparc-icl-nx7; exit ;; - esac ;; - sun4H:SunOS:5.*:*) - echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - i86pc:SunOS:5.*:*) - echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4*:SunOS:6*:*) - # According to config.sub, this is the proper way to canonicalize - # SunOS6. Hard to guess exactly what SunOS6 will be like, but - # it's likely to be more like Solaris than SunOS4. - echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4*:SunOS:*:*) - case "`/usr/bin/arch -k`" in - Series*|S4*) - UNAME_RELEASE=`uname -v` - ;; - esac - # Japanese Language versions have a version number like `4.1.3-JL'. - echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit ;; - sun3*:SunOS:*:*) - echo m68k-sun-sunos${UNAME_RELEASE} - exit ;; - sun*:*:4.2BSD:*) - UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 - case "`/bin/arch`" in - sun3) - echo m68k-sun-sunos${UNAME_RELEASE} - ;; - sun4) - echo sparc-sun-sunos${UNAME_RELEASE} - ;; - esac - exit ;; - aushp:SunOS:*:*) - echo sparc-auspex-sunos${UNAME_RELEASE} - exit ;; - # The situation for MiNT is a little confusing. The machine name - # can be virtually everything (everything which is not - # "atarist" or "atariste" at least should have a processor - # > m68000). The system name ranges from "MiNT" over "FreeMiNT" - # to the lowercase version "mint" (or "freemint"). Finally - # the system name "TOS" denotes a system which is actually not - # MiNT. But MiNT is downward compatible to TOS, so this should - # be no problem. - atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit ;; - atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit ;; - *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit ;; - milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} - exit ;; - hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} - exit ;; - *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} - exit ;; - m68k:machten:*:*) - echo m68k-apple-machten${UNAME_RELEASE} - exit ;; - powerpc:machten:*:*) - echo powerpc-apple-machten${UNAME_RELEASE} - exit ;; - RISC*:Mach:*:*) - echo mips-dec-mach_bsd4.3 - exit ;; - RISC*:ULTRIX:*:*) - echo mips-dec-ultrix${UNAME_RELEASE} - exit ;; - VAX*:ULTRIX*:*:*) - echo vax-dec-ultrix${UNAME_RELEASE} - exit ;; - 2020:CLIX:*:* | 2430:CLIX:*:*) - echo clipper-intergraph-clix${UNAME_RELEASE} - exit ;; - mips:*:*:UMIPS | mips:*:*:RISCos) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c -#ifdef __cplusplus -#include /* for printf() prototype */ - int main (int argc, char *argv[]) { -#else - int main (argc, argv) int argc; char *argv[]; { -#endif - #if defined (host_mips) && defined (MIPSEB) - #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); - #endif - #endif - exit (-1); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c && - dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && - SYSTEM_NAME=`$dummy $dummyarg` && - { echo "$SYSTEM_NAME"; exit; } - echo mips-mips-riscos${UNAME_RELEASE} - exit ;; - Motorola:PowerMAX_OS:*:*) - echo powerpc-motorola-powermax - exit ;; - Motorola:*:4.3:PL8-*) - echo powerpc-harris-powermax - exit ;; - Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) - echo powerpc-harris-powermax - exit ;; - Night_Hawk:Power_UNIX:*:*) - echo powerpc-harris-powerunix - exit ;; - m88k:CX/UX:7*:*) - echo m88k-harris-cxux7 - exit ;; - m88k:*:4*:R4*) - echo m88k-motorola-sysv4 - exit ;; - m88k:*:3*:R3*) - echo m88k-motorola-sysv3 - exit ;; - AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` - if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] - then - if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ - [ ${TARGET_BINARY_INTERFACE}x = x ] - then - echo m88k-dg-dgux${UNAME_RELEASE} - else - echo m88k-dg-dguxbcs${UNAME_RELEASE} - fi - else - echo i586-dg-dgux${UNAME_RELEASE} - fi - exit ;; - M88*:DolphinOS:*:*) # DolphinOS (SVR3) - echo m88k-dolphin-sysv3 - exit ;; - M88*:*:R3*:*) - # Delta 88k system running SVR3 - echo m88k-motorola-sysv3 - exit ;; - XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) - echo m88k-tektronix-sysv3 - exit ;; - Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) - echo m68k-tektronix-bsd - exit ;; - *:IRIX*:*:*) - echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit ;; - ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' - i*86:AIX:*:*) - echo i386-ibm-aix - exit ;; - ia64:AIX:*:*) - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} - fi - echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} - exit ;; - *:AIX:2:3) - if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - - main() - { - if (!__power_pc()) - exit(1); - puts("powerpc-ibm-aix3.2.5"); - exit(0); - } -EOF - if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` - then - echo "$SYSTEM_NAME" - else - echo rs6000-ibm-aix3.2.5 - fi - elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then - echo rs6000-ibm-aix3.2.4 - else - echo rs6000-ibm-aix3.2 - fi - exit ;; - *:AIX:*:[45]) - IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` - if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then - IBM_ARCH=rs6000 - else - IBM_ARCH=powerpc - fi - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} - fi - echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit ;; - *:AIX:*:*) - echo rs6000-ibm-aix - exit ;; - ibmrt:4.4BSD:*|romp-ibm:BSD:*) - echo romp-ibm-bsd4.4 - exit ;; - ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and - echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit ;; # report: romp-ibm BSD 4.3 - *:BOSX:*:*) - echo rs6000-bull-bosx - exit ;; - DPX/2?00:B.O.S.:*:*) - echo m68k-bull-sysv3 - exit ;; - 9000/[34]??:4.3bsd:1.*:*) - echo m68k-hp-bsd - exit ;; - hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) - echo m68k-hp-bsd4.4 - exit ;; - 9000/[34678]??:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - case "${UNAME_MACHINE}" in - 9000/31? ) HP_ARCH=m68000 ;; - 9000/[34]?? ) HP_ARCH=m68k ;; - 9000/[678][0-9][0-9]) - if [ -x /usr/bin/getconf ]; then - sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; - '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 - esac ;; - esac - fi - if [ "${HP_ARCH}" = "" ]; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - - #define _HPUX_SOURCE - #include - #include - - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); - - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } -EOF - (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` - test -z "$HP_ARCH" && HP_ARCH=hppa - fi ;; - esac - if [ ${HP_ARCH} = "hppa2.0w" ] - then - eval $set_cc_for_build - - # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating - # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler - # generating 64-bit code. GNU and HP use different nomenclature: - # - # $ CC_FOR_BUILD=cc ./config.guess - # => hppa2.0w-hp-hpux11.23 - # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess - # => hppa64-hp-hpux11.23 - - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | - grep __LP64__ >/dev/null - then - HP_ARCH="hppa2.0w" - else - HP_ARCH="hppa64" - fi - fi - echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit ;; - ia64:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - echo ia64-hp-hpux${HPUX_REV} - exit ;; - 3050*:HI-UX:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - int - main () - { - long cpu = sysconf (_SC_CPU_VERSION); - /* The order matters, because CPU_IS_HP_MC68K erroneously returns - true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct - results, however. */ - if (CPU_IS_PA_RISC (cpu)) - { - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; - case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; - default: puts ("hppa-hitachi-hiuxwe2"); break; - } - } - else if (CPU_IS_HP_MC68K (cpu)) - puts ("m68k-hitachi-hiuxwe2"); - else puts ("unknown-hitachi-hiuxwe2"); - exit (0); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && - { echo "$SYSTEM_NAME"; exit; } - echo unknown-hitachi-hiuxwe2 - exit ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) - echo hppa1.1-hp-bsd - exit ;; - 9000/8??:4.3bsd:*:*) - echo hppa1.0-hp-bsd - exit ;; - *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) - echo hppa1.0-hp-mpeix - exit ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) - echo hppa1.1-hp-osf - exit ;; - hp8??:OSF1:*:*) - echo hppa1.0-hp-osf - exit ;; - i*86:OSF1:*:*) - if [ -x /usr/sbin/sysversion ] ; then - echo ${UNAME_MACHINE}-unknown-osf1mk - else - echo ${UNAME_MACHINE}-unknown-osf1 - fi - exit ;; - parisc*:Lites*:*:*) - echo hppa1.1-hp-lites - exit ;; - C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) - echo c1-convex-bsd - exit ;; - C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) - echo c34-convex-bsd - exit ;; - C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) - echo c38-convex-bsd - exit ;; - C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) - echo c4-convex-bsd - exit ;; - CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*[A-Z]90:*:*:*) - echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ - | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ - -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ - -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*TS:*:*:*) - echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*T3E:*:*:*) - echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*SV1:*:*:*) - echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - *:UNICOS/mp:*:*) - echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit ;; - 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` - echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit ;; - i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) - echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit ;; - sparc*:BSD/OS:*:*) - echo sparc-unknown-bsdi${UNAME_RELEASE} - exit ;; - *:BSD/OS:*:*) - echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} - exit ;; - *:FreeBSD:*:*) - case ${UNAME_MACHINE} in - pc98) - echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - amd64) - echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - *) - echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - esac - exit ;; - i*:CYGWIN*:*) - echo ${UNAME_MACHINE}-pc-cygwin - exit ;; - i*:MINGW*:*) - echo ${UNAME_MACHINE}-pc-mingw32 - exit ;; - i*:windows32*:*) - # uname -m includes "-pc" on this system. - echo ${UNAME_MACHINE}-mingw32 - exit ;; - i*:PW*:*) - echo ${UNAME_MACHINE}-pc-pw32 - exit ;; - x86:Interix*:[3456]*) - echo i586-pc-interix${UNAME_RELEASE} - exit ;; - EM64T:Interix*:[3456]*) - echo x86_64-unknown-interix${UNAME_RELEASE} - exit ;; - [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) - echo i${UNAME_MACHINE}-pc-mks - exit ;; - i*:Windows_NT*:* | Pentium*:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we - # UNAME_MACHINE based on the output of uname instead of i386? - echo i586-pc-interix - exit ;; - i*:UWIN*:*) - echo ${UNAME_MACHINE}-pc-uwin - exit ;; - amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) - echo x86_64-unknown-cygwin - exit ;; - p*:CYGWIN*:*) - echo powerpcle-unknown-cygwin - exit ;; - prep*:SunOS:5.*:*) - echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - *:GNU:*:*) - # the GNU system - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit ;; - *:GNU/*:*:*) - # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu - exit ;; - i*86:Minix:*:*) - echo ${UNAME_MACHINE}-pc-minix - exit ;; - arm*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - avr32*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - cris:Linux:*:*) - echo cris-axis-linux-gnu - exit ;; - crisv32:Linux:*:*) - echo crisv32-axis-linux-gnu - exit ;; - frv:Linux:*:*) - echo frv-unknown-linux-gnu - exit ;; - ia64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - m32r*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - mips:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips - #undef mipsel - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mipsel - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips - #else - CPU= - #endif - #endif -EOF - eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' - /^CPU/{ - s: ::g - p - }'`" - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } - ;; - mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips64 - #undef mips64el - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mips64el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips64 - #else - CPU= - #endif - #endif -EOF - eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' - /^CPU/{ - s: ::g - p - }'`" - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } - ;; - or32:Linux:*:*) - echo or32-unknown-linux-gnu - exit ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu - exit ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu - exit ;; - alpha:Linux:*:*) - case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in - EV5) UNAME_MACHINE=alphaev5 ;; - EV56) UNAME_MACHINE=alphaev56 ;; - PCA56) UNAME_MACHINE=alphapca56 ;; - PCA57) UNAME_MACHINE=alphapca56 ;; - EV6) UNAME_MACHINE=alphaev6 ;; - EV67) UNAME_MACHINE=alphaev67 ;; - EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null - if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} - exit ;; - parisc:Linux:*:* | hppa:Linux:*:*) - # Look for CPU level - case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) echo hppa1.1-unknown-linux-gnu ;; - PA8*) echo hppa2.0-unknown-linux-gnu ;; - *) echo hppa-unknown-linux-gnu ;; - esac - exit ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu - exit ;; - s390:Linux:*:* | s390x:Linux:*:*) - echo ${UNAME_MACHINE}-ibm-linux - exit ;; - sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - sh*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - sparc:Linux:*:* | sparc64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - vax:Linux:*:*) - echo ${UNAME_MACHINE}-dec-linux-gnu - exit ;; - x86_64:Linux:*:*) - echo x86_64-unknown-linux-gnu - exit ;; - i*86:Linux:*:*) - # The BFD linker knows what the default object file format is, so - # first see if it will tell us. cd to the root directory to prevent - # problems with other programs or directories called `ld' in the path. - # Set LC_ALL=C to ensure ld outputs messages in English. - ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ - | sed -ne '/supported targets:/!d - s/[ ][ ]*/ /g - s/.*supported targets: *// - s/ .*// - p'` - case "$ld_supported_targets" in - elf32-i386) - TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" - ;; - a.out-i386-linux) - echo "${UNAME_MACHINE}-pc-linux-gnuaout" - exit ;; - coff-i386) - echo "${UNAME_MACHINE}-pc-linux-gnucoff" - exit ;; - "") - # Either a pre-BFD a.out linker (linux-gnuoldld) or - # one that does not give us useful --help. - echo "${UNAME_MACHINE}-pc-linux-gnuoldld" - exit ;; - esac - # Determine whether the default compiler is a.out or elf - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #ifdef __ELF__ - # ifdef __GLIBC__ - # if __GLIBC__ >= 2 - LIBC=gnu - # else - LIBC=gnulibc1 - # endif - # else - LIBC=gnulibc1 - # endif - #else - #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) - LIBC=gnu - #else - LIBC=gnuaout - #endif - #endif - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' - /^LIBC/{ - s: ::g - p - }'`" - test x"${LIBC}" != x && { - echo "${UNAME_MACHINE}-pc-linux-${LIBC}" - exit - } - test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } - ;; - i*86:DYNIX/ptx:4*:*) - # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. - # earlier versions are messed up and put the nodename in both - # sysname and nodename. - echo i386-sequent-sysv4 - exit ;; - i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, - # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. - echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit ;; - i*86:OS/2:*:*) - # If we were able to find `uname', then EMX Unix compatibility - # is probably installed. - echo ${UNAME_MACHINE}-pc-os2-emx - exit ;; - i*86:XTS-300:*:STOP) - echo ${UNAME_MACHINE}-unknown-stop - exit ;; - i*86:atheos:*:*) - echo ${UNAME_MACHINE}-unknown-atheos - exit ;; - i*86:syllable:*:*) - echo ${UNAME_MACHINE}-pc-syllable - exit ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) - echo i386-unknown-lynxos${UNAME_RELEASE} - exit ;; - i*86:*DOS:*:*) - echo ${UNAME_MACHINE}-pc-msdosdjgpp - exit ;; - i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) - UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` - if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} - else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} - fi - exit ;; - i*86:*:5:[678]*) - # UnixWare 7.x, OpenUNIX and OpenServer 6. - case `/bin/uname -X | grep "^Machine"` in - *486*) UNAME_MACHINE=i486 ;; - *Pentium) UNAME_MACHINE=i586 ;; - *Pent*|*Celeron) UNAME_MACHINE=i686 ;; - esac - echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} - exit ;; - i*86:*:3.2:*) - if test -f /usr/options/cb.name; then - UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then - UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` - (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 - (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ - && UNAME_MACHINE=i586 - (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ - && UNAME_MACHINE=i686 - (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ - && UNAME_MACHINE=i686 - echo ${UNAME_MACHINE}-pc-sco$UNAME_REL - else - echo ${UNAME_MACHINE}-pc-sysv32 - fi - exit ;; - pc:*:*:*) - # Left here for compatibility: - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i386. - echo i386-pc-msdosdjgpp - exit ;; - Intel:Mach:3*:*) - echo i386-pc-mach3 - exit ;; - paragon:*:*:*) - echo i860-intel-osf1 - exit ;; - i860:*:4.*:*) # i860-SVR4 - if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 - else # Add other i860-SVR4 vendors below as they are discovered. - echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 - fi - exit ;; - mini*:CTIX:SYS*5:*) - # "miniframe" - echo m68010-convergent-sysv - exit ;; - mc68k:UNIX:SYSTEM5:3.51m) - echo m68k-convergent-sysv - exit ;; - M680?0:D-NIX:5.3:*) - echo m68k-diab-dnix - exit ;; - M68*:*:R3V[5678]*:*) - test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; - 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) - OS_REL='' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3${OS_REL}; exit; } - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; - 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4; exit; } ;; - m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) - echo m68k-unknown-lynxos${UNAME_RELEASE} - exit ;; - mc68030:UNIX_System_V:4.*:*) - echo m68k-atari-sysv4 - exit ;; - TSUNAMI:LynxOS:2.*:*) - echo sparc-unknown-lynxos${UNAME_RELEASE} - exit ;; - rs6000:LynxOS:2.*:*) - echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) - echo powerpc-unknown-lynxos${UNAME_RELEASE} - exit ;; - SM[BE]S:UNIX_SV:*:*) - echo mips-dde-sysv${UNAME_RELEASE} - exit ;; - RM*:ReliantUNIX-*:*:*) - echo mips-sni-sysv4 - exit ;; - RM*:SINIX-*:*:*) - echo mips-sni-sysv4 - exit ;; - *:SINIX-*:*:*) - if uname -p 2>/dev/null >/dev/null ; then - UNAME_MACHINE=`(uname -p) 2>/dev/null` - echo ${UNAME_MACHINE}-sni-sysv4 - else - echo ns32k-sni-sysv - fi - exit ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - echo i586-unisys-sysv4 - exit ;; - *:UNIX_System_V:4*:FTX*) - # From Gerald Hewes . - # How about differentiating between stratus architectures? -djm - echo hppa1.1-stratus-sysv4 - exit ;; - *:*:*:FTX*) - # From seanf@swdc.stratus.com. - echo i860-stratus-sysv4 - exit ;; - i*86:VOS:*:*) - # From Paul.Green@stratus.com. - echo ${UNAME_MACHINE}-stratus-vos - exit ;; - *:VOS:*:*) - # From Paul.Green@stratus.com. - echo hppa1.1-stratus-vos - exit ;; - mc68*:A/UX:*:*) - echo m68k-apple-aux${UNAME_RELEASE} - exit ;; - news*:NEWS-OS:6*:*) - echo mips-sony-newsos6 - exit ;; - R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) - if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} - else - echo mips-unknown-sysv${UNAME_RELEASE} - fi - exit ;; - BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. - echo powerpc-be-beos - exit ;; - BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. - echo powerpc-apple-beos - exit ;; - BePC:BeOS:*:*) # BeOS running on Intel PC compatible. - echo i586-pc-beos - exit ;; - SX-4:SUPER-UX:*:*) - echo sx4-nec-superux${UNAME_RELEASE} - exit ;; - SX-5:SUPER-UX:*:*) - echo sx5-nec-superux${UNAME_RELEASE} - exit ;; - SX-6:SUPER-UX:*:*) - echo sx6-nec-superux${UNAME_RELEASE} - exit ;; - Power*:Rhapsody:*:*) - echo powerpc-apple-rhapsody${UNAME_RELEASE} - exit ;; - *:Rhapsody:*:*) - echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} - exit ;; - *:Darwin:*:*) - UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown - case $UNAME_PROCESSOR in - unknown) UNAME_PROCESSOR=powerpc ;; - esac - echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} - exit ;; - *:procnto*:*:* | *:QNX:[0123456789]*:*) - UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = "x86"; then - UNAME_PROCESSOR=i386 - UNAME_MACHINE=pc - fi - echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} - exit ;; - *:QNX:*:4*) - echo i386-pc-qnx - exit ;; - NSE-?:NONSTOP_KERNEL:*:*) - echo nse-tandem-nsk${UNAME_RELEASE} - exit ;; - NSR-?:NONSTOP_KERNEL:*:*) - echo nsr-tandem-nsk${UNAME_RELEASE} - exit ;; - *:NonStop-UX:*:*) - echo mips-compaq-nonstopux - exit ;; - BS2000:POSIX*:*:*) - echo bs2000-siemens-sysv - exit ;; - DS/*:UNIX_System_V:*:*) - echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} - exit ;; - *:Plan9:*:*) - # "uname -m" is not consistent, so use $cputype instead. 386 - # is converted to i386 for consistency with other x86 - # operating systems. - if test "$cputype" = "386"; then - UNAME_MACHINE=i386 - else - UNAME_MACHINE="$cputype" - fi - echo ${UNAME_MACHINE}-unknown-plan9 - exit ;; - *:TOPS-10:*:*) - echo pdp10-unknown-tops10 - exit ;; - *:TENEX:*:*) - echo pdp10-unknown-tenex - exit ;; - KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) - echo pdp10-dec-tops20 - exit ;; - XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) - echo pdp10-xkl-tops20 - exit ;; - *:TOPS-20:*:*) - echo pdp10-unknown-tops20 - exit ;; - *:ITS:*:*) - echo pdp10-unknown-its - exit ;; - SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} - exit ;; - *:DragonFly:*:*) - echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit ;; - *:*VMS:*:*) - UNAME_MACHINE=`(uname -p) 2>/dev/null` - case "${UNAME_MACHINE}" in - A*) echo alpha-dec-vms ; exit ;; - I*) echo ia64-dec-vms ; exit ;; - V*) echo vax-dec-vms ; exit ;; - esac ;; - *:XENIX:*:SysV) - echo i386-pc-xenix - exit ;; - i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' - exit ;; - i*86:rdos:*:*) - echo ${UNAME_MACHINE}-pc-rdos - exit ;; -esac - -#echo '(No uname command or uname output not recognized.)' 1>&2 -#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 - -eval $set_cc_for_build -cat >$dummy.c < -# include -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix\n"); exit (0); -#endif - -#if defined (hp300) && !defined (hpux) - printf ("m68k-hp-bsd\n"); exit (0); -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); - -#endif - -#if defined (vax) -# if !defined (ultrix) -# include -# if defined (BSD) -# if BSD == 43 - printf ("vax-dec-bsd4.3\n"); exit (0); -# else -# if BSD == 199006 - printf ("vax-dec-bsd4.3reno\n"); exit (0); -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# endif -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# else - printf ("vax-dec-ultrix\n"); exit (0); -# endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && - { echo "$SYSTEM_NAME"; exit; } - -# Apollos put the system type in the environment. - -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } - -# Convex versions that predate uname can use getsysinfo(1) - -if [ -x /usr/convex/getsysinfo ] -then - case `getsysinfo -f cpu_type` in - c1*) - echo c1-convex-bsd - exit ;; - c2*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - c34*) - echo c34-convex-bsd - exit ;; - c38*) - echo c38-convex-bsd - exit ;; - c4*) - echo c4-convex-bsd - exit ;; - esac -fi - -cat >&2 < in order to provide the needed -information to handle your system. - -config.guess timestamp = $timestamp - -uname -m = `(uname -m) 2>/dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` - -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null` - -hostinfo = `(hostinfo) 2>/dev/null` -/bin/universe = `(/bin/universe) 2>/dev/null` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` -/bin/arch = `(/bin/arch) 2>/dev/null` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` - -UNAME_MACHINE = ${UNAME_MACHINE} -UNAME_RELEASE = ${UNAME_RELEASE} -UNAME_SYSTEM = ${UNAME_SYSTEM} -UNAME_VERSION = ${UNAME_VERSION} -EOF - -exit 1 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff --git a/kokkos/basic/optional/ThreadPool/config/config.sub b/kokkos/basic/optional/ThreadPool/config/config.sub deleted file mode 100755 index fab0aa3..0000000 --- a/kokkos/basic/optional/ThreadPool/config/config.sub +++ /dev/null @@ -1,1616 +0,0 @@ -#! /bin/sh -# Configuration validation subroutine script. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, -# Inc. - -timestamp='2006-09-20' - -# This file is (in principle) common to ALL GNU software. -# The presence of a machine in this file suggests that SOME GNU software -# can handle that machine. It does not imply ALL GNU software can. -# -# This file is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA -# 02110-1301, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - - -# Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. -# -# Configuration subroutine to validate and canonicalize a configuration type. -# Supply the specified configuration type as an argument. -# If it is invalid, we print an error message on stderr and exit with code 1. -# Otherwise, we print the canonical config type on stdout and succeed. - -# This file is supposed to be the same for all GNU packages -# and recognize all the CPU types, system types and aliases -# that are meaningful with *any* GNU software. -# Each package is responsible for reporting which valid configurations -# it does not support. The user should be able to distinguish -# a failure to support a valid configuration from a meaningless -# configuration. - -# The goal of this file is to map all the various variations of a given -# machine specification into a single specification in the form: -# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM -# or in some cases, the newer four-part form: -# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM -# It is wrong to echo any other type of specification. - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS - $0 [OPTION] ALIAS - -Canonicalize a configuration name. - -Operation modes: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to ." - -version="\ -GNU config.sub ($timestamp) - -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 -Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit ;; - --version | -v ) - echo "$version" ; exit ;; - --help | --h* | -h ) - echo "$usage"; exit ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" - exit 1 ;; - - *local*) - # First pass through any local machine types. - echo $1 - exit ;; - - * ) - break ;; - esac -done - -case $# in - 0) echo "$me: missing argument$help" >&2 - exit 1;; - 1) ;; - *) echo "$me: too many arguments$help" >&2 - exit 1;; -esac - -# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). -# Here we must recognize all the valid KERNEL-OS combinations. -maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` -case $maybe_os in - nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \ - uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \ - storm-chaos* | os2-emx* | rtmk-nova*) - os=-$maybe_os - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` - ;; - *) - basic_machine=`echo $1 | sed 's/-[^-]*$//'` - if [ $basic_machine != $1 ] - then os=`echo $1 | sed 's/.*-/-/'` - else os=; fi - ;; -esac - -### Let's recognize common machines as not being operating systems so -### that things like config.sub decstation-3100 work. We also -### recognize some manufacturers as not being operating systems, so we -### can provide default operating systems below. -case $os in - -sun*os*) - # Prevent following clause from handling this invalid input. - ;; - -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ - -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ - -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ - -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ - -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ - -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis | -knuth | -cray) - os= - basic_machine=$1 - ;; - -sim | -cisco | -oki | -wec | -winbond) - os= - basic_machine=$1 - ;; - -scout) - ;; - -wrs) - os=-vxworks - basic_machine=$1 - ;; - -chorusos*) - os=-chorusos - basic_machine=$1 - ;; - -chorusrdb) - os=-chorusrdb - basic_machine=$1 - ;; - -hiux*) - os=-hiuxwe2 - ;; - -sco6) - os=-sco5v6 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco5) - os=-sco3.2v5 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco4) - os=-sco3.2v4 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2.[4-9]*) - os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2v[4-9]*) - # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco5v6*) - # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco*) - os=-sco3.2v2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -udk*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -isc) - os=-isc2.2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -clix*) - basic_machine=clipper-intergraph - ;; - -isc*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -lynx*) - os=-lynxos - ;; - -ptx*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` - ;; - -windowsnt*) - os=`echo $os | sed -e 's/windowsnt/winnt/'` - ;; - -psos*) - os=-psos - ;; - -mint | -mint[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; -esac - -# Decode aliases for certain CPU-COMPANY combinations. -case $basic_machine in - # Recognize the basic CPU types without company name. - # Some are omitted here because they have special meanings below. - 1750a | 580 \ - | a29k \ - | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ - | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ - | am33_2.0 \ - | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ - | bfin \ - | c4x | clipper \ - | d10v | d30v | dlx | dsp16xx \ - | fr30 | frv \ - | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ - | i370 | i860 | i960 | ia64 \ - | ip2k | iq2000 \ - | m32c | m32r | m32rle | m68000 | m68k | m88k \ - | maxq | mb | microblaze | mcore \ - | mips | mipsbe | mipseb | mipsel | mipsle \ - | mips16 \ - | mips64 | mips64el \ - | mips64vr | mips64vrel \ - | mips64orion | mips64orionel \ - | mips64vr4100 | mips64vr4100el \ - | mips64vr4300 | mips64vr4300el \ - | mips64vr5000 | mips64vr5000el \ - | mips64vr5900 | mips64vr5900el \ - | mipsisa32 | mipsisa32el \ - | mipsisa32r2 | mipsisa32r2el \ - | mipsisa64 | mipsisa64el \ - | mipsisa64r2 | mipsisa64r2el \ - | mipsisa64sb1 | mipsisa64sb1el \ - | mipsisa64sr71k | mipsisa64sr71kel \ - | mipstx39 | mipstx39el \ - | mn10200 | mn10300 \ - | mt \ - | msp430 \ - | nios | nios2 \ - | ns16k | ns32k \ - | or32 \ - | pdp10 | pdp11 | pj | pjl \ - | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ - | pyramid \ - | score \ - | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ - | sh64 | sh64le \ - | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ - | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ - | spu | strongarm \ - | tahoe | thumb | tic4x | tic80 | tron \ - | v850 | v850e \ - | we32k \ - | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ - | z8k) - basic_machine=$basic_machine-unknown - ;; - m6811 | m68hc11 | m6812 | m68hc12) - # Motorola 68HC11/12. - basic_machine=$basic_machine-unknown - os=-none - ;; - m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) - ;; - ms1) - basic_machine=mt-unknown - ;; - - # We use `pc' rather than `unknown' - # because (1) that's what they normally are, and - # (2) the word "unknown" tends to confuse beginning users. - i*86 | x86_64) - basic_machine=$basic_machine-pc - ;; - # Object if more than one company name word. - *-*-*) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; - # Recognize the basic CPU types with company name. - 580-* \ - | a29k-* \ - | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ - | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ - | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ - | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ - | avr-* | avr32-* \ - | bfin-* | bs2000-* \ - | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ - | clipper-* | craynv-* | cydra-* \ - | d10v-* | d30v-* | dlx-* \ - | elxsi-* \ - | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ - | h8300-* | h8500-* \ - | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ - | i*86-* | i860-* | i960-* | ia64-* \ - | ip2k-* | iq2000-* \ - | m32c-* | m32r-* | m32rle-* \ - | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | maxq-* | mcore-* \ - | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ - | mips16-* \ - | mips64-* | mips64el-* \ - | mips64vr-* | mips64vrel-* \ - | mips64orion-* | mips64orionel-* \ - | mips64vr4100-* | mips64vr4100el-* \ - | mips64vr4300-* | mips64vr4300el-* \ - | mips64vr5000-* | mips64vr5000el-* \ - | mips64vr5900-* | mips64vr5900el-* \ - | mipsisa32-* | mipsisa32el-* \ - | mipsisa32r2-* | mipsisa32r2el-* \ - | mipsisa64-* | mipsisa64el-* \ - | mipsisa64r2-* | mipsisa64r2el-* \ - | mipsisa64sb1-* | mipsisa64sb1el-* \ - | mipsisa64sr71k-* | mipsisa64sr71kel-* \ - | mipstx39-* | mipstx39el-* \ - | mmix-* \ - | mt-* \ - | msp430-* \ - | nios-* | nios2-* \ - | none-* | np1-* | ns16k-* | ns32k-* \ - | orion-* \ - | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ - | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ - | pyramid-* \ - | romp-* | rs6000-* \ - | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ - | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ - | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ - | sparclite-* \ - | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ - | tahoe-* | thumb-* \ - | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ - | tron-* \ - | v850-* | v850e-* | vax-* \ - | we32k-* \ - | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ - | xstormy16-* | xtensa-* \ - | ymp-* \ - | z8k-*) - ;; - # Recognize the various machine names and aliases which stand - # for a CPU type and a company and sometimes even an OS. - 386bsd) - basic_machine=i386-unknown - os=-bsd - ;; - 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) - basic_machine=m68000-att - ;; - 3b*) - basic_machine=we32k-att - ;; - a29khif) - basic_machine=a29k-amd - os=-udi - ;; - abacus) - basic_machine=abacus-unknown - ;; - adobe68k) - basic_machine=m68010-adobe - os=-scout - ;; - alliant | fx80) - basic_machine=fx80-alliant - ;; - altos | altos3068) - basic_machine=m68k-altos - ;; - am29k) - basic_machine=a29k-none - os=-bsd - ;; - amd64) - basic_machine=x86_64-pc - ;; - amd64-*) - basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - amdahl) - basic_machine=580-amdahl - os=-sysv - ;; - amiga | amiga-*) - basic_machine=m68k-unknown - ;; - amigaos | amigados) - basic_machine=m68k-unknown - os=-amigaos - ;; - amigaunix | amix) - basic_machine=m68k-unknown - os=-sysv4 - ;; - apollo68) - basic_machine=m68k-apollo - os=-sysv - ;; - apollo68bsd) - basic_machine=m68k-apollo - os=-bsd - ;; - aux) - basic_machine=m68k-apple - os=-aux - ;; - balance) - basic_machine=ns32k-sequent - os=-dynix - ;; - c90) - basic_machine=c90-cray - os=-unicos - ;; - convex-c1) - basic_machine=c1-convex - os=-bsd - ;; - convex-c2) - basic_machine=c2-convex - os=-bsd - ;; - convex-c32) - basic_machine=c32-convex - os=-bsd - ;; - convex-c34) - basic_machine=c34-convex - os=-bsd - ;; - convex-c38) - basic_machine=c38-convex - os=-bsd - ;; - cray | j90) - basic_machine=j90-cray - os=-unicos - ;; - craynv) - basic_machine=craynv-cray - os=-unicosmp - ;; - cr16c) - basic_machine=cr16c-unknown - os=-elf - ;; - crds | unos) - basic_machine=m68k-crds - ;; - crisv32 | crisv32-* | etraxfs*) - basic_machine=crisv32-axis - ;; - cris | cris-* | etrax*) - basic_machine=cris-axis - ;; - crx) - basic_machine=crx-unknown - os=-elf - ;; - da30 | da30-*) - basic_machine=m68k-da30 - ;; - decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) - basic_machine=mips-dec - ;; - decsystem10* | dec10*) - basic_machine=pdp10-dec - os=-tops10 - ;; - decsystem20* | dec20*) - basic_machine=pdp10-dec - os=-tops20 - ;; - delta | 3300 | motorola-3300 | motorola-delta \ - | 3300-motorola | delta-motorola) - basic_machine=m68k-motorola - ;; - delta88) - basic_machine=m88k-motorola - os=-sysv3 - ;; - djgpp) - basic_machine=i586-pc - os=-msdosdjgpp - ;; - dpx20 | dpx20-*) - basic_machine=rs6000-bull - os=-bosx - ;; - dpx2* | dpx2*-bull) - basic_machine=m68k-bull - os=-sysv3 - ;; - ebmon29k) - basic_machine=a29k-amd - os=-ebmon - ;; - elxsi) - basic_machine=elxsi-elxsi - os=-bsd - ;; - encore | umax | mmax) - basic_machine=ns32k-encore - ;; - es1800 | OSE68k | ose68k | ose | OSE) - basic_machine=m68k-ericsson - os=-ose - ;; - fx2800) - basic_machine=i860-alliant - ;; - genix) - basic_machine=ns32k-ns - ;; - gmicro) - basic_machine=tron-gmicro - os=-sysv - ;; - go32) - basic_machine=i386-pc - os=-go32 - ;; - h3050r* | hiux*) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - h8300hms) - basic_machine=h8300-hitachi - os=-hms - ;; - h8300xray) - basic_machine=h8300-hitachi - os=-xray - ;; - h8500hms) - basic_machine=h8500-hitachi - os=-hms - ;; - harris) - basic_machine=m88k-harris - os=-sysv3 - ;; - hp300-*) - basic_machine=m68k-hp - ;; - hp300bsd) - basic_machine=m68k-hp - os=-bsd - ;; - hp300hpux) - basic_machine=m68k-hp - os=-hpux - ;; - hp3k9[0-9][0-9] | hp9[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k2[0-9][0-9] | hp9k31[0-9]) - basic_machine=m68000-hp - ;; - hp9k3[2-9][0-9]) - basic_machine=m68k-hp - ;; - hp9k6[0-9][0-9] | hp6[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k7[0-79][0-9] | hp7[0-79][0-9]) - basic_machine=hppa1.1-hp - ;; - hp9k78[0-9] | hp78[0-9]) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][13679] | hp8[0-9][13679]) - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][0-9] | hp8[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hppa-next) - os=-nextstep3 - ;; - hppaosf) - basic_machine=hppa1.1-hp - os=-osf - ;; - hppro) - basic_machine=hppa1.1-hp - os=-proelf - ;; - i370-ibm* | ibm*) - basic_machine=i370-ibm - ;; -# I'm not sure what "Sysv32" means. Should this be sysv3.2? - i*86v32) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv32 - ;; - i*86v4*) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv4 - ;; - i*86v) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv - ;; - i*86sol2) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-solaris2 - ;; - i386mach) - basic_machine=i386-mach - os=-mach - ;; - i386-vsta | vsta) - basic_machine=i386-unknown - os=-vsta - ;; - iris | iris4d) - basic_machine=mips-sgi - case $os in - -irix*) - ;; - *) - os=-irix4 - ;; - esac - ;; - isi68 | isi) - basic_machine=m68k-isi - os=-sysv - ;; - m88k-omron*) - basic_machine=m88k-omron - ;; - magnum | m3230) - basic_machine=mips-mips - os=-sysv - ;; - merlin) - basic_machine=ns32k-utek - os=-sysv - ;; - mingw32) - basic_machine=i386-pc - os=-mingw32 - ;; - miniframe) - basic_machine=m68000-convergent - ;; - *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; - mips3*-*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` - ;; - mips3*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown - ;; - monitor) - basic_machine=m68k-rom68k - os=-coff - ;; - morphos) - basic_machine=powerpc-unknown - os=-morphos - ;; - msdos) - basic_machine=i386-pc - os=-msdos - ;; - ms1-*) - basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` - ;; - mvs) - basic_machine=i370-ibm - os=-mvs - ;; - ncr3000) - basic_machine=i486-ncr - os=-sysv4 - ;; - netbsd386) - basic_machine=i386-unknown - os=-netbsd - ;; - netwinder) - basic_machine=armv4l-rebel - os=-linux - ;; - news | news700 | news800 | news900) - basic_machine=m68k-sony - os=-newsos - ;; - news1000) - basic_machine=m68030-sony - os=-newsos - ;; - news-3600 | risc-news) - basic_machine=mips-sony - os=-newsos - ;; - necv70) - basic_machine=v70-nec - os=-sysv - ;; - next | m*-next ) - basic_machine=m68k-next - case $os in - -nextstep* ) - ;; - -ns2*) - os=-nextstep2 - ;; - *) - os=-nextstep3 - ;; - esac - ;; - nh3000) - basic_machine=m68k-harris - os=-cxux - ;; - nh[45]000) - basic_machine=m88k-harris - os=-cxux - ;; - nindy960) - basic_machine=i960-intel - os=-nindy - ;; - mon960) - basic_machine=i960-intel - os=-mon960 - ;; - nonstopux) - basic_machine=mips-compaq - os=-nonstopux - ;; - np1) - basic_machine=np1-gould - ;; - nsr-tandem) - basic_machine=nsr-tandem - ;; - op50n-* | op60c-*) - basic_machine=hppa1.1-oki - os=-proelf - ;; - openrisc | openrisc-*) - basic_machine=or32-unknown - ;; - os400) - basic_machine=powerpc-ibm - os=-os400 - ;; - OSE68000 | ose68000) - basic_machine=m68000-ericsson - os=-ose - ;; - os68k) - basic_machine=m68k-none - os=-os68k - ;; - pa-hitachi) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - paragon) - basic_machine=i860-intel - os=-osf - ;; - pbd) - basic_machine=sparc-tti - ;; - pbb) - basic_machine=m68k-tti - ;; - pc532 | pc532-*) - basic_machine=ns32k-pc532 - ;; - pc98) - basic_machine=i386-pc - ;; - pc98-*) - basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentium | p5 | k5 | k6 | nexgen | viac3) - basic_machine=i586-pc - ;; - pentiumpro | p6 | 6x86 | athlon | athlon_*) - basic_machine=i686-pc - ;; - pentiumii | pentium2 | pentiumiii | pentium3) - basic_machine=i686-pc - ;; - pentium4) - basic_machine=i786-pc - ;; - pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) - basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumpro-* | p6-* | 6x86-* | athlon-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentium4-*) - basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pn) - basic_machine=pn-gould - ;; - power) basic_machine=power-ibm - ;; - ppc) basic_machine=powerpc-unknown - ;; - ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppcle | powerpclittle | ppc-le | powerpc-little) - basic_machine=powerpcle-unknown - ;; - ppcle-* | powerpclittle-*) - basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64) basic_machine=powerpc64-unknown - ;; - ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64le | powerpc64little | ppc64-le | powerpc64-little) - basic_machine=powerpc64le-unknown - ;; - ppc64le-* | powerpc64little-*) - basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ps2) - basic_machine=i386-ibm - ;; - pw32) - basic_machine=i586-unknown - os=-pw32 - ;; - rdos) - basic_machine=i386-pc - os=-rdos - ;; - rom68k) - basic_machine=m68k-rom68k - os=-coff - ;; - rm[46]00) - basic_machine=mips-siemens - ;; - rtpc | rtpc-*) - basic_machine=romp-ibm - ;; - s390 | s390-*) - basic_machine=s390-ibm - ;; - s390x | s390x-*) - basic_machine=s390x-ibm - ;; - sa29200) - basic_machine=a29k-amd - os=-udi - ;; - sb1) - basic_machine=mipsisa64sb1-unknown - ;; - sb1el) - basic_machine=mipsisa64sb1el-unknown - ;; - sde) - basic_machine=mipsisa32-sde - os=-elf - ;; - sei) - basic_machine=mips-sei - os=-seiux - ;; - sequent) - basic_machine=i386-sequent - ;; - sh) - basic_machine=sh-hitachi - os=-hms - ;; - sh64) - basic_machine=sh64-unknown - ;; - sparclite-wrs | simso-wrs) - basic_machine=sparclite-wrs - os=-vxworks - ;; - sps7) - basic_machine=m68k-bull - os=-sysv2 - ;; - spur) - basic_machine=spur-unknown - ;; - st2000) - basic_machine=m68k-tandem - ;; - stratus) - basic_machine=i860-stratus - os=-sysv4 - ;; - sun2) - basic_machine=m68000-sun - ;; - sun2os3) - basic_machine=m68000-sun - os=-sunos3 - ;; - sun2os4) - basic_machine=m68000-sun - os=-sunos4 - ;; - sun3os3) - basic_machine=m68k-sun - os=-sunos3 - ;; - sun3os4) - basic_machine=m68k-sun - os=-sunos4 - ;; - sun4os3) - basic_machine=sparc-sun - os=-sunos3 - ;; - sun4os4) - basic_machine=sparc-sun - os=-sunos4 - ;; - sun4sol2) - basic_machine=sparc-sun - os=-solaris2 - ;; - sun3 | sun3-*) - basic_machine=m68k-sun - ;; - sun4) - basic_machine=sparc-sun - ;; - sun386 | sun386i | roadrunner) - basic_machine=i386-sun - ;; - sv1) - basic_machine=sv1-cray - os=-unicos - ;; - symmetry) - basic_machine=i386-sequent - os=-dynix - ;; - t3e) - basic_machine=alphaev5-cray - os=-unicos - ;; - t90) - basic_machine=t90-cray - os=-unicos - ;; - tic54x | c54x*) - basic_machine=tic54x-unknown - os=-coff - ;; - tic55x | c55x*) - basic_machine=tic55x-unknown - os=-coff - ;; - tic6x | c6x*) - basic_machine=tic6x-unknown - os=-coff - ;; - tx39) - basic_machine=mipstx39-unknown - ;; - tx39el) - basic_machine=mipstx39el-unknown - ;; - toad1) - basic_machine=pdp10-xkl - os=-tops20 - ;; - tower | tower-32) - basic_machine=m68k-ncr - ;; - tpf) - basic_machine=s390x-ibm - os=-tpf - ;; - udi29k) - basic_machine=a29k-amd - os=-udi - ;; - ultra3) - basic_machine=a29k-nyu - os=-sym1 - ;; - v810 | necv810) - basic_machine=v810-nec - os=-none - ;; - vaxv) - basic_machine=vax-dec - os=-sysv - ;; - vms) - basic_machine=vax-dec - os=-vms - ;; - vpp*|vx|vx-*) - basic_machine=f301-fujitsu - ;; - vxworks960) - basic_machine=i960-wrs - os=-vxworks - ;; - vxworks68) - basic_machine=m68k-wrs - os=-vxworks - ;; - vxworks29k) - basic_machine=a29k-wrs - os=-vxworks - ;; - w65*) - basic_machine=w65-wdc - os=-none - ;; - w89k-*) - basic_machine=hppa1.1-winbond - os=-proelf - ;; - xbox) - basic_machine=i686-pc - os=-mingw32 - ;; - xps | xps100) - basic_machine=xps100-honeywell - ;; - ymp) - basic_machine=ymp-cray - os=-unicos - ;; - z8k-*-coff) - basic_machine=z8k-unknown - os=-sim - ;; - none) - basic_machine=none-none - os=-none - ;; - -# Here we handle the default manufacturer of certain CPU types. It is in -# some cases the only manufacturer, in others, it is the most popular. - w89k) - basic_machine=hppa1.1-winbond - ;; - op50n) - basic_machine=hppa1.1-oki - ;; - op60c) - basic_machine=hppa1.1-oki - ;; - romp) - basic_machine=romp-ibm - ;; - mmix) - basic_machine=mmix-knuth - ;; - rs6000) - basic_machine=rs6000-ibm - ;; - vax) - basic_machine=vax-dec - ;; - pdp10) - # there are many clones, so DEC is not a safe bet - basic_machine=pdp10-unknown - ;; - pdp11) - basic_machine=pdp11-dec - ;; - we32k) - basic_machine=we32k-att - ;; - sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele) - basic_machine=sh-unknown - ;; - sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) - basic_machine=sparc-sun - ;; - cydra) - basic_machine=cydra-cydrome - ;; - orion) - basic_machine=orion-highlevel - ;; - orion105) - basic_machine=clipper-highlevel - ;; - mac | mpw | mac-mpw) - basic_machine=m68k-apple - ;; - pmac | pmac-mpw) - basic_machine=powerpc-apple - ;; - *-unknown) - # Make sure to match an already-canonicalized machine name. - ;; - *) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; -esac - -# Here we canonicalize certain aliases for manufacturers. -case $basic_machine in - *-digital*) - basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` - ;; - *-commodore*) - basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` - ;; - *) - ;; -esac - -# Decode manufacturer-specific aliases for certain operating systems. - -if [ x"$os" != x"" ] -then -case $os in - # First match some system type aliases - # that might get confused with valid system types. - # -solaris* is a basic system type, with this one exception. - -solaris1 | -solaris1.*) - os=`echo $os | sed -e 's|solaris1|sunos4|'` - ;; - -solaris) - os=-solaris2 - ;; - -svr4*) - os=-sysv4 - ;; - -unixware*) - os=-sysv4.2uw - ;; - -gnu/linux*) - os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` - ;; - # First accept the basic system types. - # The portable systems comes first. - # Each alternative MUST END IN A *, to match a version number. - # -sysv* is not here because it comes later, after sysvr4. - -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ - | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ - | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ - | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* \ - | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ - | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -openbsd* | -solidbsd* \ - | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ - | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ - | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ - | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* \ - | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \ - | -uxpv* | -beos* | -mpeix* | -udk* \ - | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ - | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ - | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ - | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ - | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ - | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers*) - # Remember, each alternative MUST END IN *, to match a version number. - ;; - -qnx*) - case $basic_machine in - x86-* | i*86-*) - ;; - *) - os=-nto$os - ;; - esac - ;; - -nto-qnx*) - ;; - -nto*) - os=`echo $os | sed -e 's|nto|nto-qnx|'` - ;; - -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ - | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ - | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) - ;; - -mac*) - os=`echo $os | sed -e 's|mac|macos|'` - ;; - -linux-dietlibc) - os=-linux-dietlibc - ;; - -linux*) - os=`echo $os | sed -e 's|linux|linux-gnu|'` - ;; - -sunos5*) - os=`echo $os | sed -e 's|sunos5|solaris2|'` - ;; - -sunos6*) - os=`echo $os | sed -e 's|sunos6|solaris3|'` - ;; - -opened*) - os=-openedition - ;; - -os400*) - os=-os400 - ;; - -wince*) - os=-wince - ;; - -osfrose*) - os=-osfrose - ;; - -osf*) - os=-osf - ;; - -utek*) - os=-bsd - ;; - -dynix*) - os=-bsd - ;; - -acis*) - os=-aos - ;; - -atheos*) - os=-atheos - ;; - -syllable*) - os=-syllable - ;; - -386bsd) - os=-bsd - ;; - -ctix* | -uts*) - os=-sysv - ;; - -nova*) - os=-rtmk-nova - ;; - -ns2 ) - os=-nextstep2 - ;; - -nsk*) - os=-nsk - ;; - # Preserve the version number of sinix5. - -sinix5.*) - os=`echo $os | sed -e 's|sinix|sysv|'` - ;; - -sinix*) - os=-sysv4 - ;; - -tpf*) - os=-tpf - ;; - -triton*) - os=-sysv3 - ;; - -oss*) - os=-sysv3 - ;; - -svr4) - os=-sysv4 - ;; - -svr3) - os=-sysv3 - ;; - -sysvr4) - os=-sysv4 - ;; - # This must come after -sysvr4. - -sysv*) - ;; - -ose*) - os=-ose - ;; - -es1800*) - os=-ose - ;; - -xenix) - os=-xenix - ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) - os=-mint - ;; - -aros*) - os=-aros - ;; - -kaos*) - os=-kaos - ;; - -zvmoe) - os=-zvmoe - ;; - -none) - ;; - *) - # Get rid of the `-' at the beginning of $os. - os=`echo $os | sed 's/[^-]*-//'` - echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 - exit 1 - ;; -esac -else - -# Here we handle the default operating systems that come with various machines. -# The value should be what the vendor currently ships out the door with their -# machine or put another way, the most popular os provided with the machine. - -# Note that if you're going to try to match "-MANUFACTURER" here (say, -# "-sun"), then you have to tell the case statement up towards the top -# that MANUFACTURER isn't an operating system. Otherwise, code above -# will signal an error saying that MANUFACTURER isn't an operating -# system, and we'll never get to this point. - -case $basic_machine in - score-*) - os=-elf - ;; - spu-*) - os=-elf - ;; - *-acorn) - os=-riscix1.2 - ;; - arm*-rebel) - os=-linux - ;; - arm*-semi) - os=-aout - ;; - c4x-* | tic4x-*) - os=-coff - ;; - # This must come before the *-dec entry. - pdp10-*) - os=-tops20 - ;; - pdp11-*) - os=-none - ;; - *-dec | vax-*) - os=-ultrix4.2 - ;; - m68*-apollo) - os=-domain - ;; - i386-sun) - os=-sunos4.0.2 - ;; - m68000-sun) - os=-sunos3 - # This also exists in the configure program, but was not the - # default. - # os=-sunos4 - ;; - m68*-cisco) - os=-aout - ;; - mips*-cisco) - os=-elf - ;; - mips*-*) - os=-elf - ;; - or32-*) - os=-coff - ;; - *-tti) # must be before sparc entry or we get the wrong os. - os=-sysv3 - ;; - sparc-* | *-sun) - os=-sunos4.1.1 - ;; - *-be) - os=-beos - ;; - *-haiku) - os=-haiku - ;; - *-ibm) - os=-aix - ;; - *-knuth) - os=-mmixware - ;; - *-wec) - os=-proelf - ;; - *-winbond) - os=-proelf - ;; - *-oki) - os=-proelf - ;; - *-hp) - os=-hpux - ;; - *-hitachi) - os=-hiux - ;; - i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) - os=-sysv - ;; - *-cbm) - os=-amigaos - ;; - *-dg) - os=-dgux - ;; - *-dolphin) - os=-sysv3 - ;; - m68k-ccur) - os=-rtu - ;; - m88k-omron*) - os=-luna - ;; - *-next ) - os=-nextstep - ;; - *-sequent) - os=-ptx - ;; - *-crds) - os=-unos - ;; - *-ns) - os=-genix - ;; - i370-*) - os=-mvs - ;; - *-next) - os=-nextstep3 - ;; - *-gould) - os=-sysv - ;; - *-highlevel) - os=-bsd - ;; - *-encore) - os=-bsd - ;; - *-sgi) - os=-irix - ;; - *-siemens) - os=-sysv4 - ;; - *-masscomp) - os=-rtu - ;; - f30[01]-fujitsu | f700-fujitsu) - os=-uxpv - ;; - *-rom68k) - os=-coff - ;; - *-*bug) - os=-coff - ;; - *-apple) - os=-macos - ;; - *-atari*) - os=-mint - ;; - *) - os=-none - ;; -esac -fi - -# Here we handle the case where we know the os, and the CPU type, but not the -# manufacturer. We pick the logical manufacturer. -vendor=unknown -case $basic_machine in - *-unknown) - case $os in - -riscix*) - vendor=acorn - ;; - -sunos*) - vendor=sun - ;; - -aix*) - vendor=ibm - ;; - -beos*) - vendor=be - ;; - -hpux*) - vendor=hp - ;; - -mpeix*) - vendor=hp - ;; - -hiux*) - vendor=hitachi - ;; - -unos*) - vendor=crds - ;; - -dgux*) - vendor=dg - ;; - -luna*) - vendor=omron - ;; - -genix*) - vendor=ns - ;; - -mvs* | -opened*) - vendor=ibm - ;; - -os400*) - vendor=ibm - ;; - -ptx*) - vendor=sequent - ;; - -tpf*) - vendor=ibm - ;; - -vxsim* | -vxworks* | -windiss*) - vendor=wrs - ;; - -aux*) - vendor=apple - ;; - -hms*) - vendor=hitachi - ;; - -mpw* | -macos*) - vendor=apple - ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) - vendor=atari - ;; - -vos*) - vendor=stratus - ;; - esac - basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` - ;; -esac - -echo $basic_machine$os -exit - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff --git a/kokkos/basic/optional/ThreadPool/config/depcomp b/kokkos/basic/optional/ThreadPool/config/depcomp deleted file mode 100755 index ca5ea4e..0000000 --- a/kokkos/basic/optional/ThreadPool/config/depcomp +++ /dev/null @@ -1,584 +0,0 @@ -#! /bin/sh -# depcomp - compile a program generating dependencies as side-effects - -scriptversion=2006-10-15.18 - -# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006 Free Software -# Foundation, Inc. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Originally written by Alexandre Oliva . - -case $1 in - '') - echo "$0: No command. Try \`$0 --help' for more information." 1>&2 - exit 1; - ;; - -h | --h*) - cat <<\EOF -Usage: depcomp [--help] [--version] PROGRAM [ARGS] - -Run PROGRAMS ARGS to compile a file, generating dependencies -as side-effects. - -Environment variables: - depmode Dependency tracking mode. - source Source file read by `PROGRAMS ARGS'. - object Object file output by `PROGRAMS ARGS'. - DEPDIR directory where to store dependencies. - depfile Dependency file to output. - tmpdepfile Temporary file to use when outputing dependencies. - libtool Whether libtool is used (yes/no). - -Report bugs to . -EOF - exit $? - ;; - -v | --v*) - echo "depcomp $scriptversion" - exit $? - ;; -esac - -if test -z "$depmode" || test -z "$source" || test -z "$object"; then - echo "depcomp: Variables source, object and depmode must be set" 1>&2 - exit 1 -fi - -# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. -depfile=${depfile-`echo "$object" | - sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} -tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} - -rm -f "$tmpdepfile" - -# Some modes work just like other modes, but use different flags. We -# parameterize here, but still list the modes in the big case below, -# to make depend.m4 easier to write. Note that we *cannot* use a case -# here, because this file can only contain one case statement. -if test "$depmode" = hp; then - # HP compiler uses -M and no extra arg. - gccflag=-M - depmode=gcc -fi - -if test "$depmode" = dashXmstdout; then - # This is just like dashmstdout with a different argument. - dashmflag=-xM - depmode=dashmstdout -fi - -case "$depmode" in -gcc3) -## gcc 3 implements dependency tracking that does exactly what -## we want. Yay! Note: for some reason libtool 1.4 doesn't like -## it if -MD -MP comes after the -MF stuff. Hmm. -## Unfortunately, FreeBSD c89 acceptance of flags depends upon -## the command line argument order; so add the flags where they -## appear in depend2.am. Note that the slowdown incurred here -## affects only configure: in makefiles, %FASTDEP% shortcuts this. - for arg - do - case $arg in - -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; - *) set fnord "$@" "$arg" ;; - esac - shift # fnord - shift # $arg - done - "$@" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - mv "$tmpdepfile" "$depfile" - ;; - -gcc) -## There are various ways to get dependency output from gcc. Here's -## why we pick this rather obscure method: -## - Don't want to use -MD because we'd like the dependencies to end -## up in a subdir. Having to rename by hand is ugly. -## (We might end up doing this anyway to support other compilers.) -## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like -## -MM, not -M (despite what the docs say). -## - Using -M directly means running the compiler twice (even worse -## than renaming). - if test -z "$gccflag"; then - gccflag=-MD, - fi - "$@" -Wp,"$gccflag$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - echo "$object : \\" > "$depfile" - alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -## The second -e expression handles DOS-style file names with drive letters. - sed -e 's/^[^:]*: / /' \ - -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" -## This next piece of magic avoids the `deleted header file' problem. -## The problem is that when a header file which appears in a .P file -## is deleted, the dependency causes make to die (because there is -## typically no way to rebuild the header). We avoid this by adding -## dummy dependencies for each header file. Too bad gcc doesn't do -## this for us directly. - tr ' ' ' -' < "$tmpdepfile" | -## Some versions of gcc put a space before the `:'. On the theory -## that the space means something, we add a space to the output as -## well. -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -hp) - # This case exists only to let depend.m4 do its work. It works by - # looking at the text of this script. This case will never be run, - # since it is checked for above. - exit 1 - ;; - -sgi) - if test "$libtool" = yes; then - "$@" "-Wp,-MDupdate,$tmpdepfile" - else - "$@" -MDupdate "$tmpdepfile" - fi - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - - if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files - echo "$object : \\" > "$depfile" - - # Clip off the initial element (the dependent). Don't try to be - # clever and replace this with sed code, as IRIX sed won't handle - # lines with more than a fixed number of characters (4096 in - # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; - # the IRIX cc adds comments like `#:fec' to the end of the - # dependency line. - tr ' ' ' -' < "$tmpdepfile" \ - | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ - tr ' -' ' ' >> $depfile - echo >> $depfile - - # The second pass generates a dummy entry for each header file. - tr ' ' ' -' < "$tmpdepfile" \ - | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ - >> $depfile - else - # The sourcefile does not contain any dependencies, so just - # store a dummy comment line, to avoid errors with the Makefile - # "include basename.Plo" scheme. - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -aix) - # The C for AIX Compiler uses -M and outputs the dependencies - # in a .u file. In older versions, this file always lives in the - # current directory. Also, the AIX compiler puts `$object:' at the - # start of each line; $object doesn't have directory information. - # Version 6 uses the directory in both cases. - stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'` - tmpdepfile="$stripped.u" - if test "$libtool" = yes; then - "$@" -Wc,-M - else - "$@" -M - fi - stat=$? - - if test -f "$tmpdepfile"; then : - else - stripped=`echo "$stripped" | sed 's,^.*/,,'` - tmpdepfile="$stripped.u" - fi - - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - - if test -f "$tmpdepfile"; then - outname="$stripped.o" - # Each line is of the form `foo.o: dependent.h'. - # Do two passes, one to just change these to - # `$object: dependent.h' and one to simply `dependent.h:'. - sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile" - sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile" - else - # The sourcefile does not contain any dependencies, so just - # store a dummy comment line, to avoid errors with the Makefile - # "include basename.Plo" scheme. - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -icc) - # Intel's C compiler understands `-MD -MF file'. However on - # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c - # ICC 7.0 will fill foo.d with something like - # foo.o: sub/foo.c - # foo.o: sub/foo.h - # which is wrong. We want: - # sub/foo.o: sub/foo.c - # sub/foo.o: sub/foo.h - # sub/foo.c: - # sub/foo.h: - # ICC 7.1 will output - # foo.o: sub/foo.c sub/foo.h - # and will wrap long lines using \ : - # foo.o: sub/foo.c ... \ - # sub/foo.h ... \ - # ... - - "$@" -MD -MF "$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - # Each line is of the form `foo.o: dependent.h', - # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. - # Do two passes, one to just change these to - # `$object: dependent.h' and one to simply `dependent.h:'. - sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" - # Some versions of the HPUX 10.20 sed can't process this invocation - # correctly. Breaking it into two sed invocations is a workaround. - sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | - sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -hp2) - # The "hp" stanza above does not work with aCC (C++) and HP's ia64 - # compilers, which have integrated preprocessors. The correct option - # to use with these is +Maked; it writes dependencies to a file named - # 'foo.d', which lands next to the object file, wherever that - # happens to be. - # Much of this is similar to the tru64 case; see comments there. - dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` - test "x$dir" = "x$object" && dir= - base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` - if test "$libtool" = yes; then - tmpdepfile1=$dir$base.d - tmpdepfile2=$dir.libs/$base.d - "$@" -Wc,+Maked - else - tmpdepfile1=$dir$base.d - tmpdepfile2=$dir$base.d - "$@" +Maked - fi - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile1" "$tmpdepfile2" - exit $stat - fi - - for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" - do - test -f "$tmpdepfile" && break - done - if test -f "$tmpdepfile"; then - sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile" - # Add `dependent.h:' lines. - sed -ne '2,${; s/^ *//; s/ \\*$//; s/$/:/; p;}' "$tmpdepfile" >> "$depfile" - else - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" "$tmpdepfile2" - ;; - -tru64) - # The Tru64 compiler uses -MD to generate dependencies as a side - # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. - # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put - # dependencies in `foo.d' instead, so we check for that too. - # Subdirectories are respected. - dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` - test "x$dir" = "x$object" && dir= - base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` - - if test "$libtool" = yes; then - # With Tru64 cc, shared objects can also be used to make a - # static library. This mechanism is used in libtool 1.4 series to - # handle both shared and static libraries in a single compilation. - # With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d. - # - # With libtool 1.5 this exception was removed, and libtool now - # generates 2 separate objects for the 2 libraries. These two - # compilations output dependencies in $dir.libs/$base.o.d and - # in $dir$base.o.d. We have to check for both files, because - # one of the two compilations can be disabled. We should prefer - # $dir$base.o.d over $dir.libs/$base.o.d because the latter is - # automatically cleaned when .libs/ is deleted, while ignoring - # the former would cause a distcleancheck panic. - tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4 - tmpdepfile2=$dir$base.o.d # libtool 1.5 - tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5 - tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504 - "$@" -Wc,-MD - else - tmpdepfile1=$dir$base.o.d - tmpdepfile2=$dir$base.d - tmpdepfile3=$dir$base.d - tmpdepfile4=$dir$base.d - "$@" -MD - fi - - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" - exit $stat - fi - - for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" - do - test -f "$tmpdepfile" && break - done - if test -f "$tmpdepfile"; then - sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" - # That's a tab and a space in the []. - sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" - else - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -#nosideeffect) - # This comment above is used by automake to tell side-effect - # dependency tracking mechanisms from slower ones. - -dashmstdout) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout, regardless of -o. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - - # Remove `-o $object'. - IFS=" " - for arg - do - case $arg in - -o) - shift - ;; - $object) - shift - ;; - *) - set fnord "$@" "$arg" - shift # fnord - shift # $arg - ;; - esac - done - - test -z "$dashmflag" && dashmflag=-M - # Require at least two characters before searching for `:' - # in the target name. This is to cope with DOS-style filenames: - # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. - "$@" $dashmflag | - sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" - rm -f "$depfile" - cat < "$tmpdepfile" > "$depfile" - tr ' ' ' -' < "$tmpdepfile" | \ -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -dashXmstdout) - # This case only exists to satisfy depend.m4. It is never actually - # run, as this mode is specially recognized in the preamble. - exit 1 - ;; - -makedepend) - "$@" || exit $? - # Remove any Libtool call - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - # X makedepend - shift - cleared=no - for arg in "$@"; do - case $cleared in - no) - set ""; shift - cleared=yes ;; - esac - case "$arg" in - -D*|-I*) - set fnord "$@" "$arg"; shift ;; - # Strip any option that makedepend may not understand. Remove - # the object too, otherwise makedepend will parse it as a source file. - -*|$object) - ;; - *) - set fnord "$@" "$arg"; shift ;; - esac - done - obj_suffix="`echo $object | sed 's/^.*\././'`" - touch "$tmpdepfile" - ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" - rm -f "$depfile" - cat < "$tmpdepfile" > "$depfile" - sed '1,2d' "$tmpdepfile" | tr ' ' ' -' | \ -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" "$tmpdepfile".bak - ;; - -cpp) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - - # Remove `-o $object'. - IFS=" " - for arg - do - case $arg in - -o) - shift - ;; - $object) - shift - ;; - *) - set fnord "$@" "$arg" - shift # fnord - shift # $arg - ;; - esac - done - - "$@" -E | - sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ - -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | - sed '$ s: \\$::' > "$tmpdepfile" - rm -f "$depfile" - echo "$object : \\" > "$depfile" - cat < "$tmpdepfile" >> "$depfile" - sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -msvisualcpp) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout, regardless of -o, - # because we must use -o when running libtool. - "$@" || exit $? - IFS=" " - for arg - do - case "$arg" in - "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") - set fnord "$@" - shift - shift - ;; - *) - set fnord "$@" "$arg" - shift - shift - ;; - esac - done - "$@" -E | - sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile" - rm -f "$depfile" - echo "$object : \\" > "$depfile" - . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" - echo " " >> "$depfile" - . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -none) - exec "$@" - ;; - -*) - echo "Unknown depmode $depmode" 1>&2 - exit 1 - ;; -esac - -exit 0 - -# Local Variables: -# mode: shell-script -# sh-indentation: 2 -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/kokkos/basic/optional/ThreadPool/config/generate-makeoptions.pl b/kokkos/basic/optional/ThreadPool/config/generate-makeoptions.pl deleted file mode 100755 index a39223e..0000000 --- a/kokkos/basic/optional/ThreadPool/config/generate-makeoptions.pl +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/perl -w -# -# This perl script graps a bunch of make macro definitions -# generated for Teuchos that can be used in other makefiles. -# This is dumped to stdout and can be redirected to build -# a makefile. -# -# Note, this script must be maintained to be current for -# the Teuchos makefile. -# -use strict; - -if( !(defined(@ARGV) && scalar(@ARGV)==2) ) { - die "Error, this script takes two and only two arguments (makefile_name package_name).!\n"; -} - -my $makefile_name = shift; -my $package_name = shift; - -# -# List the macros you want to grep and include in the output -# -my @macros = - ( - "CC" - ,"CXX" - ,"F77" - ,"CXXLD" - ,"DEFS" - ,"CPPFLAGS" - ,"CFLAGS" - ,"CXXFLAGS" - ,"FFLAGS" - ,"LDFLAGS" - ,"FLIBS" - ,"BLAS_LIBS" - ,"LAPACK_LIBS" - ,"prefix" - ,"AR" - ,"ALTERNATE_AR" - ,"libteuchos_a_AR" - ,"RANLIB" - ); - -open FILE_IN, "<$makefile_name" || die "The file $makefile_name could not be opended for input\n"; -my @makefile_name_array = ; -close FILE_IN; - -# -# Find the above macros and append "${package_name}_" to the beginning. -# -my @new_macros; -my $add_next_line = 0; -foreach( @makefile_name_array ) { - my $line = $_; - if($add_next_line) { - push @new_macros, $line; - if( substr($line,-1,1) eq "\\" ) { - $add_next_line = 1; - } - else { - $add_next_line = 0; - } - next; - } - #print "Line = $line"; - foreach( @macros ) { - my $macro_search = "^${_} "; - #print "Macro search = \'$macro_search\'\n"; - if( $line=~/$macro_search/ ) { - #print "Adding Macro!\n"; - my $find_str = '\(CXX\)'; - my $replace_str = "(${package_name}_CXX)"; - $line=~s/$find_str/$replace_str/; - push @new_macros, "${package_name}_${line}"; - if( substr($line,-2,1) eq "\\" ) { - $add_next_line = 1; - } - else { - $add_next_line = 0; - } - } - } -} - -print join("",@new_macros); diff --git a/kokkos/basic/optional/ThreadPool/config/install-sh b/kokkos/basic/optional/ThreadPool/config/install-sh deleted file mode 100755 index 4fbbae7..0000000 --- a/kokkos/basic/optional/ThreadPool/config/install-sh +++ /dev/null @@ -1,507 +0,0 @@ -#!/bin/sh -# install - install a program, script, or datafile - -scriptversion=2006-10-14.15 - -# This originates from X11R5 (mit/util/scripts/install.sh), which was -# later released in X11R6 (xc/config/util/install.sh) with the -# following copyright and license. -# -# Copyright (C) 1994 X Consortium -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- -# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# Except as contained in this notice, the name of the X Consortium shall not -# be used in advertising or otherwise to promote the sale, use or other deal- -# ings in this Software without prior written authorization from the X Consor- -# tium. -# -# -# FSF changes to this file are in the public domain. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. - -nl=' -' -IFS=" "" $nl" - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" -if test -z "$doit"; then - doit_exec=exec -else - doit_exec=$doit -fi - -# Put in absolute file names if you don't have them in your path; -# or use environment vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -posix_glob= -posix_mkdir= - -# Desired mode of installed file. -mode=0755 - -chmodcmd=$chmodprog -chowncmd= -chgrpcmd= -stripcmd= -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src= -dst= -dir_arg= -dstarg= -no_target_directory= - -usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE - or: $0 [OPTION]... SRCFILES... DIRECTORY - or: $0 [OPTION]... -t DIRECTORY SRCFILES... - or: $0 [OPTION]... -d DIRECTORIES... - -In the 1st form, copy SRCFILE to DSTFILE. -In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. -In the 4th, create DIRECTORIES. - -Options: --c (ignored) --d create directories instead of installing files. --g GROUP $chgrpprog installed files to GROUP. --m MODE $chmodprog installed files to MODE. --o USER $chownprog installed files to USER. --s $stripprog installed files. --t DIRECTORY install into DIRECTORY. --T report an error if DSTFILE is a directory. ---help display this help and exit. ---version display version info and exit. - -Environment variables override the default commands: - CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG -" - -while test $# -ne 0; do - case $1 in - -c) shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - --help) echo "$usage"; exit $?;; - - -m) mode=$2 - shift - shift - case $mode in - *' '* | *' '* | *' -'* | *'*'* | *'?'* | *'['*) - echo "$0: invalid mode: $mode" >&2 - exit 1;; - esac - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -s) stripcmd=$stripprog - shift - continue;; - - -t) dstarg=$2 - shift - shift - continue;; - - -T) no_target_directory=true - shift - continue;; - - --version) echo "$0 $scriptversion"; exit $?;; - - --) shift - break;; - - -*) echo "$0: invalid option: $1" >&2 - exit 1;; - - *) break;; - esac -done - -if test $# -ne 0 && test -z "$dir_arg$dstarg"; then - # When -d is used, all remaining arguments are directories to create. - # When -t is used, the destination is already specified. - # Otherwise, the last argument is the destination. Remove it from $@. - for arg - do - if test -n "$dstarg"; then - # $@ is not empty: it contains at least $arg. - set fnord "$@" "$dstarg" - shift # fnord - fi - shift # arg - dstarg=$arg - done -fi - -if test $# -eq 0; then - if test -z "$dir_arg"; then - echo "$0: no input file specified." >&2 - exit 1 - fi - # It's OK to call `install-sh -d' without argument. - # This can happen when creating conditional directories. - exit 0 -fi - -if test -z "$dir_arg"; then - trap '(exit $?); exit' 1 2 13 15 - - # Set umask so as not to create temps with too-generous modes. - # However, 'strip' requires both read and write access to temps. - case $mode in - # Optimize common cases. - *644) cp_umask=133;; - *755) cp_umask=22;; - - *[0-7]) - if test -z "$stripcmd"; then - u_plus_rw= - else - u_plus_rw='% 200' - fi - cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; - *) - if test -z "$stripcmd"; then - u_plus_rw= - else - u_plus_rw=,u+rw - fi - cp_umask=$mode$u_plus_rw;; - esac -fi - -for src -do - # Protect names starting with `-'. - case $src in - -*) src=./$src ;; - esac - - if test -n "$dir_arg"; then - dst=$src - dstdir=$dst - test -d "$dstdir" - dstdir_status=$? - else - - # Waiting for this to be detected by the "$cpprog $src $dsttmp" command - # might cause directories to be created, which would be especially bad - # if $src (and thus $dsttmp) contains '*'. - if test ! -f "$src" && test ! -d "$src"; then - echo "$0: $src does not exist." >&2 - exit 1 - fi - - if test -z "$dstarg"; then - echo "$0: no destination specified." >&2 - exit 1 - fi - - dst=$dstarg - # Protect names starting with `-'. - case $dst in - -*) dst=./$dst ;; - esac - - # If destination is a directory, append the input filename; won't work - # if double slashes aren't ignored. - if test -d "$dst"; then - if test -n "$no_target_directory"; then - echo "$0: $dstarg: Is a directory" >&2 - exit 1 - fi - dstdir=$dst - dst=$dstdir/`basename "$src"` - dstdir_status=0 - else - # Prefer dirname, but fall back on a substitute if dirname fails. - dstdir=` - (dirname "$dst") 2>/dev/null || - expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$dst" : 'X\(//\)[^/]' \| \ - X"$dst" : 'X\(//\)$' \| \ - X"$dst" : 'X\(/\)' \| . 2>/dev/null || - echo X"$dst" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q' - ` - - test -d "$dstdir" - dstdir_status=$? - fi - fi - - obsolete_mkdir_used=false - - if test $dstdir_status != 0; then - case $posix_mkdir in - '') - # Create intermediate dirs using mode 755 as modified by the umask. - # This is like FreeBSD 'install' as of 1997-10-28. - umask=`umask` - case $stripcmd.$umask in - # Optimize common cases. - *[2367][2367]) mkdir_umask=$umask;; - .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; - - *[0-7]) - mkdir_umask=`expr $umask + 22 \ - - $umask % 100 % 40 + $umask % 20 \ - - $umask % 10 % 4 + $umask % 2 - `;; - *) mkdir_umask=$umask,go-w;; - esac - - # With -d, create the new directory with the user-specified mode. - # Otherwise, rely on $mkdir_umask. - if test -n "$dir_arg"; then - mkdir_mode=-m$mode - else - mkdir_mode= - fi - - posix_mkdir=false - case $umask in - *[123567][0-7][0-7]) - # POSIX mkdir -p sets u+wx bits regardless of umask, which - # is incompatible with FreeBSD 'install' when (umask & 300) != 0. - ;; - *) - tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ - trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 - - if (umask $mkdir_umask && - exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 - then - if test -z "$dir_arg" || { - # Check for POSIX incompatibilities with -m. - # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or - # other-writeable bit of parent directory when it shouldn't. - # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. - ls_ld_tmpdir=`ls -ld "$tmpdir"` - case $ls_ld_tmpdir in - d????-?r-*) different_mode=700;; - d????-?--*) different_mode=755;; - *) false;; - esac && - $mkdirprog -m$different_mode -p -- "$tmpdir" && { - ls_ld_tmpdir_1=`ls -ld "$tmpdir"` - test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" - } - } - then posix_mkdir=: - fi - rmdir "$tmpdir/d" "$tmpdir" - else - # Remove any dirs left behind by ancient mkdir implementations. - rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null - fi - trap '' 0;; - esac;; - esac - - if - $posix_mkdir && ( - umask $mkdir_umask && - $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" - ) - then : - else - - # The umask is ridiculous, or mkdir does not conform to POSIX, - # or it failed possibly due to a race condition. Create the - # directory the slow way, step by step, checking for races as we go. - - case $dstdir in - /*) prefix=/ ;; - -*) prefix=./ ;; - *) prefix= ;; - esac - - case $posix_glob in - '') - if (set -f) 2>/dev/null; then - posix_glob=true - else - posix_glob=false - fi ;; - esac - - oIFS=$IFS - IFS=/ - $posix_glob && set -f - set fnord $dstdir - shift - $posix_glob && set +f - IFS=$oIFS - - prefixes= - - for d - do - test -z "$d" && continue - - prefix=$prefix$d - if test -d "$prefix"; then - prefixes= - else - if $posix_mkdir; then - (umask=$mkdir_umask && - $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break - # Don't fail if two instances are running concurrently. - test -d "$prefix" || exit 1 - else - case $prefix in - *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; - *) qprefix=$prefix;; - esac - prefixes="$prefixes '$qprefix'" - fi - fi - prefix=$prefix/ - done - - if test -n "$prefixes"; then - # Don't fail if two instances are running concurrently. - (umask $mkdir_umask && - eval "\$doit_exec \$mkdirprog $prefixes") || - test -d "$dstdir" || exit 1 - obsolete_mkdir_used=true - fi - fi - fi - - if test -n "$dir_arg"; then - { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && - { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && - { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || - test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 - else - - # Make a couple of temp file names in the proper directory. - dsttmp=$dstdir/_inst.$$_ - rmtmp=$dstdir/_rm.$$_ - - # Trap to clean up those temp files at exit. - trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 - - # Copy the file name to the temp name. - (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && - - # and set any options; do chmod last to preserve setuid bits. - # - # If any of these fail, we abort the whole thing. If we want to - # ignore errors from any of these, just make sure not to ignore - # errors from the above "$doit $cpprog $src $dsttmp" command. - # - { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \ - && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \ - && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \ - && { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && - - # Now rename the file to the real destination. - { $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null \ - || { - # The rename failed, perhaps because mv can't rename something else - # to itself, or perhaps because mv is so ancient that it does not - # support -f. - - # Now remove or move aside any old file at destination location. - # We try this two ways since rm can't unlink itself on some - # systems and the destination file might be busy for other - # reasons. In this case, the final cleanup might fail but the new - # file should still install successfully. - { - if test -f "$dst"; then - $doit $rmcmd -f "$dst" 2>/dev/null \ - || { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null \ - && { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }; }\ - || { - echo "$0: cannot unlink or rename $dst" >&2 - (exit 1); exit 1 - } - else - : - fi - } && - - # Now rename the file to the real destination. - $doit $mvcmd "$dsttmp" "$dst" - } - } || exit 1 - - trap '' 0 - fi -done - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/kokkos/basic/optional/ThreadPool/config/missing b/kokkos/basic/optional/ThreadPool/config/missing deleted file mode 100755 index 1c8ff70..0000000 --- a/kokkos/basic/optional/ThreadPool/config/missing +++ /dev/null @@ -1,367 +0,0 @@ -#! /bin/sh -# Common stub for a few missing GNU programs while installing. - -scriptversion=2006-05-10.23 - -# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006 -# Free Software Foundation, Inc. -# Originally by Fran,cois Pinard , 1996. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -if test $# -eq 0; then - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 -fi - -run=: -sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p' -sed_minuso='s/.* -o \([^ ]*\).*/\1/p' - -# In the cases where this matters, `missing' is being run in the -# srcdir already. -if test -f configure.ac; then - configure_ac=configure.ac -else - configure_ac=configure.in -fi - -msg="missing on your system" - -case $1 in ---run) - # Try to run requested program, and just exit if it succeeds. - run= - shift - "$@" && exit 0 - # Exit code 63 means version mismatch. This often happens - # when the user try to use an ancient version of a tool on - # a file that requires a minimum version. In this case we - # we should proceed has if the program had been absent, or - # if --run hadn't been passed. - if test $? = 63; then - run=: - msg="probably too old" - fi - ;; - - -h|--h|--he|--hel|--help) - echo "\ -$0 [OPTION]... PROGRAM [ARGUMENT]... - -Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an -error status if there is no known handling for PROGRAM. - -Options: - -h, --help display this help and exit - -v, --version output version information and exit - --run try to run the given command, and emulate it if it fails - -Supported PROGRAM values: - aclocal touch file \`aclocal.m4' - autoconf touch file \`configure' - autoheader touch file \`config.h.in' - autom4te touch the output file, or create a stub one - automake touch all \`Makefile.in' files - bison create \`y.tab.[ch]', if possible, from existing .[ch] - flex create \`lex.yy.c', if possible, from existing .c - help2man touch the output file - lex create \`lex.yy.c', if possible, from existing .c - makeinfo touch the output file - tar try tar, gnutar, gtar, then tar without non-portable flags - yacc create \`y.tab.[ch]', if possible, from existing .[ch] - -Send bug reports to ." - exit $? - ;; - - -v|--v|--ve|--ver|--vers|--versi|--versio|--version) - echo "missing $scriptversion (GNU Automake)" - exit $? - ;; - - -*) - echo 1>&2 "$0: Unknown \`$1' option" - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 - ;; - -esac - -# Now exit if we have it, but it failed. Also exit now if we -# don't have it and --version was passed (most likely to detect -# the program). -case $1 in - lex|yacc) - # Not GNU programs, they don't have --version. - ;; - - tar) - if test -n "$run"; then - echo 1>&2 "ERROR: \`tar' requires --run" - exit 1 - elif test "x$2" = "x--version" || test "x$2" = "x--help"; then - exit 1 - fi - ;; - - *) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - elif test "x$2" = "x--version" || test "x$2" = "x--help"; then - # Could not run --version or --help. This is probably someone - # running `$TOOL --version' or `$TOOL --help' to check whether - # $TOOL exists and not knowing $TOOL uses missing. - exit 1 - fi - ;; -esac - -# If it does not exist, or fails to run (possibly an outdated version), -# try to emulate it. -case $1 in - aclocal*) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`acinclude.m4' or \`${configure_ac}'. You might want - to install the \`Automake' and \`Perl' packages. Grab them from - any GNU archive site." - touch aclocal.m4 - ;; - - autoconf) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`${configure_ac}'. You might want to install the - \`Autoconf' and \`GNU m4' packages. Grab them from any GNU - archive site." - touch configure - ;; - - autoheader) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`acconfig.h' or \`${configure_ac}'. You might want - to install the \`Autoconf' and \`GNU m4' packages. Grab them - from any GNU archive site." - files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` - test -z "$files" && files="config.h" - touch_files= - for f in $files; do - case $f in - *:*) touch_files="$touch_files "`echo "$f" | - sed -e 's/^[^:]*://' -e 's/:.*//'`;; - *) touch_files="$touch_files $f.in";; - esac - done - touch $touch_files - ;; - - automake*) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. - You might want to install the \`Automake' and \`Perl' packages. - Grab them from any GNU archive site." - find . -type f -name Makefile.am -print | - sed 's/\.am$/.in/' | - while read f; do touch "$f"; done - ;; - - autom4te) - echo 1>&2 "\ -WARNING: \`$1' is needed, but is $msg. - You might have modified some files without having the - proper tools for further handling them. - You can get \`$1' as part of \`Autoconf' from any GNU - archive site." - - file=`echo "$*" | sed -n "$sed_output"` - test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` - if test -f "$file"; then - touch $file - else - test -z "$file" || exec >$file - echo "#! /bin/sh" - echo "# Created by GNU Automake missing as a replacement of" - echo "# $ $@" - echo "exit 0" - chmod +x $file - exit 1 - fi - ;; - - bison|yacc) - echo 1>&2 "\ -WARNING: \`$1' $msg. You should only need it if - you modified a \`.y' file. You may need the \`Bison' package - in order for those modifications to take effect. You can get - \`Bison' from any GNU archive site." - rm -f y.tab.c y.tab.h - if test $# -ne 1; then - eval LASTARG="\${$#}" - case $LASTARG in - *.y) - SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` - if test -f "$SRCFILE"; then - cp "$SRCFILE" y.tab.c - fi - SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` - if test -f "$SRCFILE"; then - cp "$SRCFILE" y.tab.h - fi - ;; - esac - fi - if test ! -f y.tab.h; then - echo >y.tab.h - fi - if test ! -f y.tab.c; then - echo 'main() { return 0; }' >y.tab.c - fi - ;; - - lex|flex) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a \`.l' file. You may need the \`Flex' package - in order for those modifications to take effect. You can get - \`Flex' from any GNU archive site." - rm -f lex.yy.c - if test $# -ne 1; then - eval LASTARG="\${$#}" - case $LASTARG in - *.l) - SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` - if test -f "$SRCFILE"; then - cp "$SRCFILE" lex.yy.c - fi - ;; - esac - fi - if test ! -f lex.yy.c; then - echo 'main() { return 0; }' >lex.yy.c - fi - ;; - - help2man) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a dependency of a manual page. You may need the - \`Help2man' package in order for those modifications to take - effect. You can get \`Help2man' from any GNU archive site." - - file=`echo "$*" | sed -n "$sed_output"` - test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` - if test -f "$file"; then - touch $file - else - test -z "$file" || exec >$file - echo ".ab help2man is required to generate this page" - exit 1 - fi - ;; - - makeinfo) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a \`.texi' or \`.texinfo' file, or any other file - indirectly affecting the aspect of the manual. The spurious - call might also be the consequence of using a buggy \`make' (AIX, - DU, IRIX). You might want to install the \`Texinfo' package or - the \`GNU make' package. Grab either from any GNU archive site." - # The file to touch is that specified with -o ... - file=`echo "$*" | sed -n "$sed_output"` - test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` - if test -z "$file"; then - # ... or it is the one specified with @setfilename ... - infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` - file=`sed -n ' - /^@setfilename/{ - s/.* \([^ ]*\) *$/\1/ - p - q - }' $infile` - # ... or it is derived from the source name (dir/f.texi becomes f.info) - test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info - fi - # If the file does not exist, the user really needs makeinfo; - # let's fail without touching anything. - test -f $file || exit 1 - touch $file - ;; - - tar) - shift - - # We have already tried tar in the generic part. - # Look for gnutar/gtar before invocation to avoid ugly error - # messages. - if (gnutar --version > /dev/null 2>&1); then - gnutar "$@" && exit 0 - fi - if (gtar --version > /dev/null 2>&1); then - gtar "$@" && exit 0 - fi - firstarg="$1" - if shift; then - case $firstarg in - *o*) - firstarg=`echo "$firstarg" | sed s/o//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - case $firstarg in - *h*) - firstarg=`echo "$firstarg" | sed s/h//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - fi - - echo 1>&2 "\ -WARNING: I can't seem to be able to run \`tar' with the given arguments. - You may want to install GNU tar or Free paxutils, or check the - command line arguments." - exit 1 - ;; - - *) - echo 1>&2 "\ -WARNING: \`$1' is needed, and is $msg. - You might have modified some files without having the - proper tools for further handling them. Check the \`README' file, - it often tells you about the needed prerequisites for installing - this package. You may also peek at any GNU archive site, in case - some other package would contain this missing \`$1' program." - exit 1 - ;; -esac - -exit 0 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/kokkos/basic/optional/ThreadPool/config/replace-install-prefix.pl b/kokkos/basic/optional/ThreadPool/config/replace-install-prefix.pl deleted file mode 100755 index 7523b08..0000000 --- a/kokkos/basic/optional/ThreadPool/config/replace-install-prefix.pl +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use Getopt::Long; -# -# This script is called to do a set of text replacements for installing -# a Mafile.export.package file so that external clients can use it. -# -# Read in commandline arguments -# -my $exec_prefix = ""; # [required] Abs path to base installation directory (i.e. --prefix=??? option passed to configure) -my $my_export_makefile = ""; # [required] Name only of installed Makefile.export.package file -my $my_top_srcdir = ""; # [required] Abs path to this package's top source directory -my $my_incl_dirs = ""; # [required] Abs path to this package's include directories -my $my_lib_dirs = ""; # [optional] Abs path to this package's library directories (if any exist) -my $dep_package_builddirs = ""; # [optional] Abs paths to other directly dependent framework package build directories (if any exist) -GetOptions( - "exec-prefix=s" => \$exec_prefix, - "my-export-makefile=s" => \$my_export_makefile, - "my-abs-top-srcdir=s" => \$my_top_srcdir, - "my-abs-incl-dirs=s" => \$my_incl_dirs, - "my-abs-lib-dirs=s" => \$my_lib_dirs, - "dep-package-abs-builddirs=s" => \$dep_package_builddirs - ); -# -# Validate commandline arguments -# -scalar(@ARGV) == 0 || die; -$exec_prefix ne "" || die; -$my_export_makefile ne "" || die; -$my_top_srcdir ne "" || die; -$my_incl_dirs ne "" || die; -# -# Interpret commandline arguments -# -$exec_prefix = remove_rel_paths($exec_prefix); -my @my_incl_dirs = split(":",$my_incl_dirs); -my @my_lib_dirs = split(":",$my_lib_dirs); -my @dep_export_package_builddirs = split(":",$dep_package_builddirs); -# -# Do the replacements -# -my $my_abs_export_makefile = "${exec_prefix}/include/${my_export_makefile}"; - -my $cmnd_base = "${my_top_srcdir}/config/token-replace.pl "; -# -foreach(@dep_export_package_builddirs) { - if($_ ne "") { - run_cmnd($cmnd_base . "${_} ${exec_prefix}/include ${my_abs_export_makefile} ${my_abs_export_makefile}"); - } -} -# -foreach(@my_incl_dirs) { - if($_ ne "") { - run_cmnd($cmnd_base . "-I${_} -I${exec_prefix}/include ${my_abs_export_makefile} ${my_abs_export_makefile}"); - } -} -# -foreach(@my_lib_dirs) { - if($_ ne "") { - run_cmnd($cmnd_base . "-L${_} -L${exec_prefix}/lib ${my_abs_export_makefile} ${my_abs_export_makefile}"); - } -} -# -run_cmnd($cmnd_base . "${my_top_srcdir}/config ${exec_prefix}/include ${my_abs_export_makefile} ${my_abs_export_makefile}"); -# -# Subroutines -# -sub remove_rel_paths { - my $entry_in = shift; - if ($entry_in=~/-L\.\./) { - return $entry_in; - } - my @paths = split("/",$entry_in); - my @new_paths; - foreach( @paths ) { - if( !($_=~/\.\./) ) { - push @new_paths, $_; - } - else { - pop @new_paths - } - } - return join("/",@new_paths); -} -sub run_cmnd { - my $cmnd = shift; - #print "\n", $cmnd, "\n"; - system($cmnd)==0 || die; -} diff --git a/kokkos/basic/optional/ThreadPool/config/string-replace.pl b/kokkos/basic/optional/ThreadPool/config/string-replace.pl deleted file mode 100755 index adeb1f4..0000000 --- a/kokkos/basic/optional/ThreadPool/config/string-replace.pl +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/perl -w -# -# This perl script replaces a string with another string. -# Here it is allowd for file_in and file_out to be the -# same file. -# -use strict; -# -my $g_use_msg = - "Use: string-replace.pl find_string replacement_string file_in file_out\n"; -if( scalar(@ARGV) < 4 ) { - print STDERR $g_use_msg; - exit(-1); -} -# -my $find_string = shift; -my $replacement_string = shift; -my $file_in_name = shift; -my $file_out_name = shift; -# -# -if($file_in_name=~/CVS/) { -# print "Do not replace in CVS\n"; - exit; -} -# -open FILE_IN, "<$file_in_name" || die "The file $file_in_name could not be opended for input\n"; -my @file_in_array = ; -close FILE_IN; -# -my @file_out_array; -my $did_replacement = 0; -foreach(@file_in_array) { - #print $_; - $did_replacement = 1 if $_=~s/$find_string/$replacement_string/g; - #print $_; - push @file_out_array, $_; -} -if($did_replacement || $file_out_name ne $file_in_name) { - open FILE_OUT, ">$file_out_name" || die "The file $file_out_name could not be opended for output\n"; - print FILE_OUT @file_out_array; - close FILE_OUT; -} diff --git a/kokkos/basic/optional/ThreadPool/config/strip_dup_incl_paths.pl b/kokkos/basic/optional/ThreadPool/config/strip_dup_incl_paths.pl deleted file mode 100755 index c628d31..0000000 --- a/kokkos/basic/optional/ThreadPool/config/strip_dup_incl_paths.pl +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/perl -w -# This perl script removes duplicate include paths left to the right -use strict; -my @all_incl_paths = @ARGV; -my @cleaned_up_incl_paths; -foreach( @all_incl_paths ) { - $_ = remove_rel_paths($_); - if( !($_=~/-I/) ) { - push @cleaned_up_incl_paths, $_; - } - elsif( !entry_exists($_,\@cleaned_up_incl_paths) ) { - push @cleaned_up_incl_paths, $_; - } -} -print join( " ", @cleaned_up_incl_paths ); -# -# Subroutines -# -sub entry_exists { - my $entry = shift; # String - my $list = shift; # Reference to an array - foreach( @$list ) { - if( $entry eq $_ ) { return 1; } - } - return 0; -} -# -sub remove_rel_paths { - my $entry_in = shift; - if ($entry_in=~/-I\.\./) { - return $entry_in; - } - my @paths = split("/",$entry_in); - my @new_paths; - foreach( @paths ) { - if( !($_=~/\.\./) ) { - push @new_paths, $_; - } - else { - pop @new_paths - } - } - return join("/",@new_paths); -} diff --git a/kokkos/basic/optional/ThreadPool/config/strip_dup_libs.pl b/kokkos/basic/optional/ThreadPool/config/strip_dup_libs.pl deleted file mode 100755 index cdf4b42..0000000 --- a/kokkos/basic/optional/ThreadPool/config/strip_dup_libs.pl +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/perl -w -# This perl script removes duplicate libraries from the right to the left and -# removes duplicate -L library paths from the left to the right -use strict; - -my @all_libs = @ARGV; -# -# Move from left to right and remove duplicate -l libraries -# -my @cleaned_up_libs_first; -foreach( reverse @all_libs ) { - $_ = remove_rel_paths($_); - if( $_=~/-L/ ) { - unshift @cleaned_up_libs_first, $_; - } - else { - if( !entry_exists($_,\@cleaned_up_libs_first) ) { - unshift @cleaned_up_libs_first, $_; - } - } -} - -# -# Move from right to left and remove duplicate -L library paths -# -my @cleaned_up_libs; -foreach( @cleaned_up_libs_first ) { - $_ = remove_rel_paths($_); - if( !($_=~/-L/) ) { - push @cleaned_up_libs, $_; - } - elsif( !entry_exists($_,\@cleaned_up_libs) ) { - push @cleaned_up_libs, $_; - } -} -# -# Print the new list of libraries and paths -# -print join( " ", @cleaned_up_libs ); - -# -# Subroutines -# -sub entry_exists { - my $entry = shift; # String - my $list = shift; # Reference to an array - foreach( @$list ) { - if( $entry eq $_ ) { return 1; } - } - return 0; -} -# -sub remove_rel_paths { - my $entry_in = shift; - if ($entry_in=~/-L\.\./) { - return $entry_in; - } - my @paths = split("/",$entry_in); - my @new_paths; - foreach( @paths ) { - if( !($_=~/\.\./) ) { - push @new_paths, $_; - } - else { - pop @new_paths - } - } - return join("/",@new_paths); -} diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_check_mpi.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_check_mpi.m4 deleted file mode 100644 index 10d569a..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_check_mpi.m4 +++ /dev/null @@ -1,68 +0,0 @@ -dnl @synopsis TAC_ARG_CHECK_MPI -dnl -dnl Check to make sure any definitions set in TAC_ARG_CONFIG_MPI -dnl are valid, set the MPI flags. Test MPI compile using C++ compiler. -dnl -dnl @author Mike Heroux -dnl -AC_DEFUN([TAC_ARG_CHECK_MPI], -[ - -if test "X${HAVE_PKG_MPI}" = "Xyes"; then - - if test -n "${MPI_DIR}" && test -z "${MPI_INC}"; then - MPI_INC="${MPI_DIR}/include" - fi - - if test -n "${MPI_INC}"; then - CPPFLAGS="${CPPFLAGS} -I${MPI_INC}" - fi - - AC_LANG_CPLUSPLUS - AC_MSG_CHECKING(for mpi.h) - AC_TRY_CPP([#include "mpi.h"], - [AC_MSG_RESULT(yes)], - [ - AC_MSG_RESULT(no) - echo "-----" - echo "Cannot link simple MPI program." - echo "Try --with-mpi-compilers to specify MPI compilers." - echo "Or try --with-mpi-libs, --with-mpi-incdir, --with-mpi-libdir" - echo "to specify all the specific MPI compile options." - echo "-----" - AC_MSG_ERROR(MPI cannot link) - ]) - - if test -n "${MPI_DIR}" && test -z "${MPI_LIBDIR}"; then - MPI_LIBDIR="${MPI_DIR}/lib" - fi - - if test -n "${MPI_LIBDIR}"; then - LDFLAGS="${LDFLAGS} -L${MPI_LIBDIR}" - fi - - if test -z "${MPI_LIBS}" && test -n "${MPI_LIBDIR}"; then - MPI_LIBS="-lmpi" - fi - - if test -n "${MPI_LIBS}"; then - LIBS="${MPI_LIBS} ${LIBS}" - fi - -# AC_LANG_CPLUSPLUS -# AC_MSG_CHECKING(whether MPI will link using C++ compiler) -# AC_TRY_LINK([#include ], -# [int c; char** v; MPI_Init(&c,&v);], -# [AC_MSG_RESULT(yes)], -# [AC_MSG_RESULT(no) -# echo "-----" -# echo "Cannot link simple MPI program." -# echo "Try --with-mpi-cxx to specify MPI C++ compile script." -# echo "Or try --with-mpi-libs, --with-mpi-incdir, --with-mpi-libdir" -# echo "to specify all the specific MPI compile options." -# echo "-----" -# AC_MSG_ERROR(MPI cannot link)] -# ) - -fi -]) diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_config_mpi.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_config_mpi.m4 deleted file mode 100644 index 2d1dd98..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_config_mpi.m4 +++ /dev/null @@ -1,188 +0,0 @@ -dnl @synopsis TAC_ARG_CONFIG_MPI -dnl -dnl Test a variety of MPI options: -dnl --enable-mpi - Turns MPI compiling mode on -dnl --with-mpi - specify root directory of MPI -dnl --with-mpi-compilers - Turns on MPI compiling mode and sets the MPI C++ -dnl compiler = mpicxx, mpic++ or mpiCC, -dnl the MPI C compiler = mpicc and -dnl the MPI Fortran compiler = mpif77 -dnl --with-mpi-incdir - specify include directory for MPI -dnl --with-mpi-libs - specify MPI libraries -dnl --with-mpi-libdir - specify location of MPI libraries -dnl -dnl If any of these options are set, HAVE_MPI will be defined for both -dnl Autoconf and Automake, and HAVE_MPI will be defined in the -dnl generated config.h file -dnl -dnl -dnl @author Mike Heroux -dnl Modified 12/26/2007 by Jim Willenbring to skip the Fortran compiler -dnl check if Fortran is not enabled. -dnl -AC_DEFUN([TAC_ARG_CONFIG_MPI], -[ - -AC_ARG_ENABLE(mpi, -[AC_HELP_STRING([--enable-mpi],[MPI support])], -[HAVE_PKG_MPI=$enableval], -[HAVE_PKG_MPI=no] -) - -AC_ARG_WITH(mpi-compilers, -[AC_HELP_STRING([--with-mpi-compilers=PATH], -[use MPI compilers mpicc, mpif77, and mpicxx, mpic++ or mpiCC in the specified path or in the default path if no path is specified. Enables MPI])], -[ - if test X${withval} != Xno; then - HAVE_PKG_MPI=yes - if test X${withval} = Xyes; then - # Check for mpicxx, if it does not exist, check for mpic++, if it does - # not exist, use mpiCC instead. - AC_CHECK_PROG(MPI_TEMP_CXX, mpicxx, mpicxx, no) - if test X${MPI_TEMP_CXX} = Xno; then - AC_CHECK_PROG(MPI_CXX, mpic++, mpic++, mpiCC) - else - MPI_CXX=${MPI_TEMP_CXX} - fi - MPI_CC=mpicc - MPI_F77=mpif77 - else - if test -f ${withval}/mpicxx; then - MPI_CXX=${withval}/mpicxx - elif test -f ${withval}/mpic++; then - MPI_CXX=${withval}/mpic++ - else - MPI_CXX=${withval}/mpiCC - fi - MPI_CC=${withval}/mpicc - MPI_F77=${withval}/mpif77 - fi - fi -] -) - -AC_ARG_WITH(mpi, -[AC_HELP_STRING([--with-mpi=MPIROOT],[use MPI root directory (enables MPI)])], -[ - HAVE_PKG_MPI=yes - MPI_DIR=${withval} - AC_MSG_CHECKING(MPI directory) - AC_MSG_RESULT([${MPI_DIR}]) -] -) - -#AC_ARG_WITH(mpi-include, -#[AC_HELP_STRING([--with-mpi-include],[Obsolete. Use --with-mpi-incdir=DIR instead. Do not prefix DIR with '-I'.])], -#[AC_MSG_ERROR([--with-mpi-include is an obsolte option. Use --with-mpi-incdir=DIR instead. Do not prefix DIR with '-I'. For example '--with-mpi-incdir=/usr/lam_path/include'.])] -#) - -AC_ARG_WITH(mpi-libs, -[AC_HELP_STRING([--with-mpi-libs="LIBS"],[MPI libraries @<:@"-lmpi"@:>@])], -[ - MPI_LIBS=${withval} - AC_MSG_CHECKING(user-defined MPI libraries) - AC_MSG_RESULT([${MPI_LIBS}]) -] -) - -AC_ARG_WITH(mpi-incdir, -[AC_HELP_STRING([--with-mpi-incdir=DIR],[MPI include directory @<:@MPIROOT/include@:>@ Do not use -I])], -[ - MPI_INC=${withval} - AC_MSG_CHECKING(user-defined MPI includes) - AC_MSG_RESULT([${MPI_INC}]) -] -) - -AC_ARG_WITH(mpi-libdir, -[AC_HELP_STRING([--with-mpi-libdir=DIR],[MPI library directory @<:@MPIROOT/lib@:>@ Do not use -L])], -[ - MPI_LIBDIR=${withval} - AC_MSG_CHECKING(user-defined MPI library directory) - AC_MSG_RESULT([${MPI_LIBDIR}]) -] -) - -AC_MSG_CHECKING(whether we are using MPI) -AC_MSG_RESULT([${HAVE_PKG_MPI}]) - -if test "X${HAVE_PKG_MPI}" = "Xyes"; then - AC_DEFINE(HAVE_MPI,,[define if we want to use MPI]) -fi - -dnl Define Automake version of HAVE_MPI if appropriate - -AM_CONDITIONAL(HAVE_MPI, [test "X${HAVE_PKG_MPI}" = "Xyes"]) - - -dnl -dnl -------------------------------------------------------------------- -dnl Check for MPI compilers (must be done *before* AC_PROG_CXX, -dnl AC_PROG_CC and AC_PROG_F77) -dnl -dnl -------------------------------------------------------------------- - -if test -n "${MPI_CXX}"; then - if test -f ${MPI_CXX}; then - MPI_CXX_EXISTS=yes - else - AC_CHECK_PROG(MPI_CXX_EXISTS, ${MPI_CXX}, yes, no) - fi - - if test "X${MPI_CXX_EXISTS}" = "Xyes"; then - CXX=${MPI_CXX} - else - echo "-----" - echo "Cannot find MPI C++ compiler ${MPI_CXX}." - echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" - echo "or specify a C++ compiler using CXX=" - echo "Do not use --with-mpi-compilers if using CXX=" - echo "-----" - AC_MSG_ERROR([MPI C++ compiler (${MPI_CXX}) not found.]) - fi -fi - -if test -n "${MPI_CC}"; then - if test -f ${MPI_CC}; then - MPI_CC_EXISTS=yes - else - AC_CHECK_PROG(MPI_CC_EXISTS, ${MPI_CC}, yes, no) - fi - - if test "X${MPI_CC_EXISTS}" = "Xyes"; then - CC=${MPI_CC} - else - echo "-----" - echo "Cannot find MPI C compiler ${MPI_CC}." - echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" - echo "or specify a C compiler using CC=" - echo "Do not use --with-mpi-compilers if using CC=" - echo "-----" - AC_MSG_ERROR([MPI C compiler (${MPI_CC}) not found.]) - fi -fi - -if test "X$ac_cv_use_fortran" = "Xyes"; then - -if test -n "${MPI_F77}"; then - if test -f ${MPI_F77}; then - MPI_F77_EXISTS=yes - else - AC_CHECK_PROG(MPI_F77_EXISTS, ${MPI_F77}, yes, no) - fi - - if test "X${MPI_F77_EXISTS}" = "Xyes"; then - F77=${MPI_F77} - else - echo "-----" - echo "Cannot find MPI Fortran compiler ${MPI_F77}." - echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" - echo "or specify a Fortran 77 compiler using F77=" - echo "Do not use --with-mpi-compilers if using F77=" - echo "-----" - AC_MSG_ERROR([MPI Fortran 77 compiler (${MPI_F77}) not found.]) - fi -fi - -fi dnl ac_cv_use_fortran -]) diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_enable_export-makefiles.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_enable_export-makefiles.m4 deleted file mode 100644 index b7a8b38..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_enable_export-makefiles.m4 +++ /dev/null @@ -1,76 +0,0 @@ -dnl Enables export makefile specific code -dnl -dnl The following AM_CONDITIONALS are set for makefiles to access: -dnl USING_EXPORT_MAKEFILES -dnl USING_PERL via TAC_ARG_WITH_PERL -dnl USING_GNUMAKE -dnl -dnl The following AC_DEFINES are set: -dnl HAVE_EXPORT_MAKEFILES -dnl -dnl the following variables are set: -dnl PERL_EXE for the perl executable via TAC_ARG_WITH_PERL -dnl -dnl This file was based on tac_arg_enable_feature.m4 by Mike Heroux -dnl @author Roger Pawlowski -dnl -AC_DEFUN([TAC_ARG_ENABLE_EXPORT_MAKEFILES], -[ -AC_ARG_ENABLE(export-makefiles, -AC_HELP_STRING([--enable-export-makefiles],[Creates export makefiles in the install (prefix) directory. This option requires perl to be set in your path or defined with --with-perl=. Note that the export makefiles are always created and used in the build directory, but will not be installable without this option to change the paths. (default is $1)]), -ac_cv_use_export_makefiles=$enableval, -ac_cv_use_export_makefiles=$1) - -AC_MSG_CHECKING(whether to build export makefiles) - -if test "X$ac_cv_use_export_makefiles" != "Xno"; then - - AC_MSG_RESULT(yes) - AC_DEFINE([HAVE_EXPORT_MAKEFILES],,[Define if you want to build export makefiles.]) - -else - - AC_MSG_RESULT(no) - -fi - -AM_CONDITIONAL(USING_EXPORT_MAKEFILES, test X${ac_cv_use_export_makefiles} = Xyes) - -# Check for perl to run scripts (Required dependency) -TAC_ARG_WITH_PERL - -if test "X$HAVE_PERL" != "Xyes" && - test "X$ac_cv_use_export_makefiles" != "Xno"; then - AC_MSG_RESULT(no) - AC_MSG_ERROR([Failed to find the perl executable. The flag --enable-export-makefiles requires perl to be either in your path or explicitly defined by the flag --with-perl=. If you do not require the export makefiles to be installed via 'make install', you can disable the export makefiles with --disable-export-makefiles.]) -fi - -# Check for using gnumake to clean up link lines via -# gnumake's "shell" command. Optional dependency. -AC_DEFUN([TAC_ARG_WITH_GNUMAKE], -[ -AC_ARG_WITH(gnumake, -AC_HELP_STRING([--with-gnumake],[Gnu's make has special functions we can use to eliminate redundant paths in the build and link lines. Enable this if you use gnu-make to build Trilinos. This requires that perl is in your path or that you have specified the perl executable with --with-perl=. Configure will check for the existence of the perl executable and quit with an error if it is not found. (default is no)]), -ac_cv_use_gnumake=$withval, ac_cv_use_gnumake=no) - -AC_MSG_CHECKING(whether gnumake specific code should be enabled) - -if test "X$ac_cv_use_gnumake" != "Xno"; then - AC_MSG_RESULT(yes) - AC_DEFINE([HAVE_GNUMAKE],,[Define if you are using gnumake - this will shorten your link lines.]) -else - AC_MSG_RESULT(no) -fi -AM_CONDITIONAL(USING_GNUMAKE, test "X$ac_cv_use_gnumake" = "Xyes") -]) - -TAC_ARG_WITH_GNUMAKE - -if test "X$HAVE_PERL" != "Xyes" && - test "X$ac_cv_use_gnumake" != "Xno"; then - AC_MSG_RESULT(no) - AC_MSG_ERROR([The flag --with-gnumake requires perl to be in your path. The perl executable can alternatively be explicitly defined by the flag --with-perl=.]) -fi - -]) - diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_enable_feature.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_enable_feature.m4 deleted file mode 100644 index 4e22753..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_enable_feature.m4 +++ /dev/null @@ -1,40 +0,0 @@ -dnl @synopsis TAC_ARG_ENABLE_FEATURE(FEATURE_NAME, FEATURE_DESCRIPTION, HAVE_NAME, DEFAULT_VAL) -dnl -dnl Test for --enable-${FEATURE_NAME} and set to DEFAULT_VAL value if feature not specified. -dnl Also calls AC_DEFINE to define HAVE_${HAVE_NAME} if value is not equal to "no" -dnl -dnl Use this macro to help defining whether or not optional -dnl features* should compiled. For example: -dnl -dnl TAC_ARG_ENABLE_FEATURE(epetra, [Configure and build epetra], EPETRA, yes) -dnl -dnl will test for --enable-epetra when configure is run. If it is defined -dnl and not set to "no" or not defined (default is "yes") then HAVE_EPETRA will -dnl be defined, if --enable-epetra is defined to be "no", HAVE_EPETRA will not -dnl be defined. -dnl -dnl *NOTE: epetra, aztecoo, komplex, ifpack, and other software found in -dnl subdirectories of Trilinos/packages are "packages" in their own right. -dnl However, these packages are also "features" of the larger package -dnl "Trilinos". Therefore, when configuring from the Trilinos directory, -dnl it is appropriate to refer to these software packages as "features". -dnl -dnl This file was based on tac_arg_with_package.m4 by Mike Heroux -dnl @author James Willenbring -dnl -AC_DEFUN([TAC_ARG_ENABLE_FEATURE], -[ -AC_ARG_ENABLE([$1], -AC_HELP_STRING([--enable-$1],[$2 (default is [$4])]), -ac_cv_use_$1=$enableval, ac_cv_use_$1=$4) - -AC_MSG_CHECKING(whether to use [$1]) - -if test "X$ac_cv_use_$1" != "Xno"; then - AC_MSG_RESULT(yes) - AC_DEFINE([HAVE_$3],,[Define if want to build $1]) -else - AC_MSG_RESULT(no) -fi -]) - diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_enable_feature_sub_check.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_enable_feature_sub_check.m4 deleted file mode 100755 index b3876fd..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_enable_feature_sub_check.m4 +++ /dev/null @@ -1,54 +0,0 @@ -dnl @synopsis TAC_ARG_ENABLE_FEATURE_SUB_CHECK(FEATURE_NAME, SUB_FEATURE_NAME, FEATURE_DESCRIPTION, HAVE_NAME) -dnl -dnl This hack gets around the fact that TAC_ARG_ENABLE_FEATURE does not support underscores -dnl in its feature names. TAC_ARG_ENABLE_FEATURE_SUB_CHECK allows exactly one underscore. Not great, -dnl but arguably better than supporting no underscores. -dnl -dnl TAC_ARG_ENABLE_FEATURE(feature-sub, [Configure and build feature-sub], FEATURE_SUB, yes) -dnl fails because tac_arg_enable_feature tests for ac_cv_use_feature-sub which gets -dnl rejected because the `-' is not allowed in variables. (AC_ARG_ENABLE sets ac_cv_use_feature_sub -dnl to avoid this problem.) Use: -dnl -dnl TAC_ARG_ENABLE_FEATURE_SUB_CHECK(feature, sub, [Configure and build feature-sub], FEATURE_SUB) -dnl instead. -dnl -dnl This macro will test for --enable-${FEATURE_NAME}-${SUB_FEATURE_NAME} when configure is run. -dnl If it is defined and not set to "no" or not defined and --disable-${SUB_FEATURE_NAME} is not -dnl specified then HAVE_${HAVE_NAME} will be defined. -dnl -dnl *NOTE: This macro is designed for the use-case when there is an individual Trilinos package -dnl offering fine-grained control of a Trilinos option. This way, the individual package -dnl option is enabled, as long as the Trilinos option is not disabled. If the Trilinos option is -dnl disabled, then the user must enable each packages option individually. For instance: -dnl -dnl --disable-tests --enable-teuchos-tests -dnl -dnl *NOTE: epetra, aztecoo, komplex, ifpack, and other software found in -dnl subdirectories of Trilinos/packages are "packages" in their own right. -dnl However, these packages are also "features" of the larger package -dnl "Trilinos". Therefore, when configuring from the Trilinos directory, -dnl it is appropriate to refer to these software packages as "features". -dnl -dnl This file was based on tac_arg_enable_package.m4 by Jim Willenbring -dnl and tac_arg_enable_package_sub.m4 by Ken Stanley. -dnl -dnl @author Heidi Thornquist -dnl -AC_DEFUN([TAC_ARG_ENABLE_FEATURE_SUB_CHECK], -[ -AC_ARG_ENABLE([$2],, ac_cv_use_$2=$enableval, ac_cv_use_$2=yes) - -AC_ARG_ENABLE([$1-$2], -AC_HELP_STRING([--enable-$1-$2],[$3 (default is yes if --disable-$2 is not specified)]), -ac_cv_use_$1_$2=$enableval, ac_cv_use_$1_$2=${ac_cv_use_$2}) - -AC_MSG_CHECKING(whether to use [$1-$2]) - -if test "X$ac_cv_use_$1_$2" != "Xno"; then - AC_MSG_RESULT(yes) - AC_DEFINE([HAVE_$4],,[Define if want to build $1-$2]) -else - AC_MSG_RESULT(no) -fi -]) - diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_ar.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_with_ar.m4 deleted file mode 100644 index 9568f3e..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_ar.m4 +++ /dev/null @@ -1,39 +0,0 @@ -dnl @synopsis TAC_ARG_WITH_AR -dnl -dnl Test for --with-ar="ar_program ar_flags". -dnl Default is "ar cru" -dnl -dnl Generates an Automake conditional USE_ALTERNATE_AR that can be tested. -dnl Generates the user-specified archiver command in @ALTERNATE_AR@. -dnl -dnl @author Mike Heroux -dnl -AC_DEFUN([TAC_ARG_WITH_AR], -[ -AC_ARG_WITH(ar, -AC_HELP_STRING([--with-ar], [override archiver command (default is "ar cru")]), -[ -AC_MSG_CHECKING(user-defined archiver) -AC_MSG_RESULT([${withval}]) -USE_ALTERNATE_AR=yes -ALTERNATE_AR="${withval}" -] -) - -if test -n "${SPECIAL_AR}" && test "X${USE_ALTERNATE_AR}" != "Xyes"; -then - USE_ALTERNATE_AR=yes - ALTERNATE_AR="${SPECIAL_AR}" -fi - -AC_MSG_CHECKING(for special archiver command) -if test "X${USE_ALTERNATE_AR}" = "Xyes"; then - AC_MSG_RESULT([${ALTERNATE_AR}]) - AM_CONDITIONAL(USE_ALTERNATE_AR, true) -else - AC_MSG_RESULT([none]) - AM_CONDITIONAL(USE_ALTERNATE_AR, false) -fi -AC_SUBST(ALTERNATE_AR) -]) - diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_flags.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_with_flags.m4 deleted file mode 100644 index 256450a..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_flags.m4 +++ /dev/null @@ -1,31 +0,0 @@ -dnl @synopsis TAC_ARG_WITH_FLAGS(lcase_name, UCASE_NAME) -dnl -dnl Test for --with-lcase_name="compiler/loader flags". if defined, prepend -dnl flags to standard UCASE_NAME definition. -dnl -dnl Use this macro to facilitate additional special flags that should be -dnl passed on to the preprocessor/compilers/loader. -dnl -dnl Example use -dnl -dnl TAC_ARG_WITH_FLAGS(cxxflags, CXXFLAGS) -dnl -dnl tests for --with-cxxflags and pre-pends to CXXFLAGS -dnl -dnl -dnl @author Mike Heroux -dnl -AC_DEFUN([TAC_ARG_WITH_FLAGS], -[ -AC_MSG_CHECKING([whether additional [$2] flags should be added]) -AC_ARG_WITH($1, -AC_HELP_STRING([--with-$1], -[additional [$2] flags to be added: will prepend to [$2]]), -[ -$2="${withval} ${$2}" -AC_MSG_RESULT([$2 = ${$2}]) -], -AC_MSG_RESULT(no) -) -]) - diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_incdirs.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_with_incdirs.m4 deleted file mode 100644 index f3092e5..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_incdirs.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl @synopsis TAC_ARG_WITH_INCDIRS -dnl -dnl Test for --with-incdirs="-Iincdir1 -Iincdir2". if defined, prepend -dnl "-Iincdir1 -Iincdir2" to CPPFLAGS -dnl -dnl Use this macro to facilitate addition of directories to include file search path. -dnl -dnl -dnl @author Mike Heroux -dnl -AC_DEFUN([TAC_ARG_WITH_INCDIRS], -[ -AC_MSG_CHECKING([whether additional include search paths defined]) -AC_ARG_WITH(incdirs, -AC_HELP_STRING([--with-incdirs], -[additional directories containing include files: will prepend to search here for includes, use -Idir format]), -[ -CPPFLAGS="${withval} ${CPPFLAGS}" -AC_MSG_RESULT([${withval}]) -], -AC_MSG_RESULT(no) -) -]) - diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_libdirs.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_with_libdirs.m4 deleted file mode 100644 index b2f9438..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_libdirs.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl @synopsis TAC_ARG_WITH_LIBDIRS -dnl -dnl Test for --with-libdirs="-Llibdir1 -Llibdir2". if defined, -dnl prepend "-Llibdir1 -Llibdir2" to LDFLAGS -dnl -dnl Use this macro to facilitate addition of directories to library search path. -dnl -dnl -dnl @author Mike Heroux -dnl -AC_DEFUN([TAC_ARG_WITH_LIBDIRS], -[ -AC_MSG_CHECKING([whether additional library search paths defined]) -AC_ARG_WITH(libdirs, -AC_HELP_STRING([--with-libdirs], -[OBSOLETE use --with-ldflags instead. (ex. --with-ldflags="-L -L")]), -[ -LDFLAGS="${withval} ${LDFLAGS}" -AC_MSG_RESULT([${withval}]) -], -AC_MSG_RESULT(no) -) -]) - diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_libs.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_with_libs.m4 deleted file mode 100644 index 3a64880..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_libs.m4 +++ /dev/null @@ -1,30 +0,0 @@ -dnl @synopsis TAC_ARG_WITH_LIBS -dnl -dnl Test for --with-libs="name(s)". -dnl -dnl Prepends the specified name(s) to the list of libraries to link -dnl with. -dnl -dnl Example use -dnl -dnl TAC_ARG_WITH_LIBS -dnl -dnl tests for --with-libs and pre-pends to LIBS -dnl -dnl @author Jim Willenbring -dnl -AC_DEFUN([TAC_ARG_WITH_LIBS], -[ -AC_MSG_CHECKING([whether additional libraries are needed]) -AC_ARG_WITH(libs, -AC_HELP_STRING([--with-libs], -[List additional libraries here. For example, --with-libs=-lsuperlu -or --with-libs=/path/libsuperlu.a]), -[ -LIBS="${withval} ${LIBS}" -AC_MSG_RESULT([LIBS = ${LIBS}]) -], -AC_MSG_RESULT(no) -) -] -) diff --git a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_perl.m4 b/kokkos/basic/optional/ThreadPool/config/tac_arg_with_perl.m4 deleted file mode 100644 index 63e74ba..0000000 --- a/kokkos/basic/optional/ThreadPool/config/tac_arg_with_perl.m4 +++ /dev/null @@ -1,34 +0,0 @@ -dnl @synopsis TAC_ARG_WITH_PERL(DEFAULT_VAL) -dnl -dnl Test for --enable-gnumake and set to DEFAULT_VAL value if feature not specified. -dnl Calls AC_DEFINE to define HAVE_GNUMAKE if value is not equal to "no" -dnl Calls AM_CONDITIONAL to define USING_GNUMAKE to true/false. -dnl -dnl This file was based on tac_arg_with_ar.m4 by Mike Heroux -dnl @author Roger Pawlowski -dnl -AC_DEFUN([TAC_ARG_WITH_PERL], -[ - -AC_ARG_WITH(perl, -AC_HELP_STRING([--with-perl], [supply a perl executable. For example --with-perl=/usr/bin/perl.]), -[ -AC_MSG_CHECKING(for user supplied perl executable) -AC_MSG_RESULT([${withval}]) -USER_SPECIFIED_PERL=yes -PERL_EXE="${withval}" -], -[ -USER_SPECIFIED_PERL=no -]) - -if test "X${USER_SPECIFIED_PERL}" = "Xyes"; then - AC_CHECK_FILE(${PERL_EXE}, [HAVE_PERL=yes], [HAVE_PERL=no]) - AC_SUBST(PERL_EXE, ${PERL_EXE}) -else - AC_CHECK_PROG(HAVE_PERL, perl, yes, no) - AC_SUBST(PERL_EXE, perl) -fi -AM_CONDITIONAL(USING_PERL, test X${HAVE_PERL} = Xyes) -]) - diff --git a/kokkos/basic/optional/ThreadPool/config/token-replace.pl b/kokkos/basic/optional/ThreadPool/config/token-replace.pl deleted file mode 100755 index c3b413e..0000000 --- a/kokkos/basic/optional/ThreadPool/config/token-replace.pl +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/perl -w -# -# This perl script replaces a string with another string -# on a token basis. Here it is allowed for file_in and -# file_out to be the same file. -# -use strict; -# -my $g_use_msg = - "Use: token-replace.pl find_token replacement_token file_in file_out\n"; -if( scalar(@ARGV) < 4 ) { - print STDERR $g_use_msg; - exit(-1); -} -# -my $find_token = shift; -my $replacement_token = shift; -my $file_in_name = shift; -my $file_out_name = shift; -# -#print "file_in_name = $file_in_name\n"; -if($file_in_name=~/CVS/) { -# print "Do not replace in CVS\n"; - exit; -} -open FILE_IN, "<$file_in_name" || die "The file $file_in_name could not be opended for input\n"; -my @file_in_array = ; -close FILE_IN; -# -my $match_str = '([^\w\d_]|^)' . $find_token . '([^\w\d_]|$)'; -#print $match_str . "\n"; -# -my @file_out_array; -my $did_replacement = 0; -foreach(@file_in_array) { - $did_replacement = 1 if $_=~s/$match_str/$1$replacement_token$2/g; - push @file_out_array, $_; -} -if($did_replacement || $file_out_name ne $file_in_name) { - open FILE_OUT, ">$file_out_name" || die "The file $file_out_name could not be opended for output\n"; - print FILE_OUT @file_out_array; - close FILE_OUT; -} diff --git a/kokkos/basic/optional/ThreadPool/configure b/kokkos/basic/optional/ThreadPool/configure deleted file mode 100755 index 6312db9..0000000 --- a/kokkos/basic/optional/ThreadPool/configure +++ /dev/null @@ -1,7804 +0,0 @@ -#! /bin/sh -# Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.61 for ThreadPool 1.1d. -# -# Report bugs to . -# -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, -# 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. -# This configure script is free software; the Free Software Foundation -# gives unlimited permission to copy, distribute and modify it. -## --------------------- ## -## M4sh Initialization. ## -## --------------------- ## - -# Be more Bourne compatible -DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then - emulate sh - NULLCMD=: - # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in - *posix*) set -o posix ;; -esac - -fi - - - - -# PATH needs CR -# Avoid depending upon Character Ranges. -as_cr_letters='abcdefghijklmnopqrstuvwxyz' -as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' -as_cr_Letters=$as_cr_letters$as_cr_LETTERS -as_cr_digits='0123456789' -as_cr_alnum=$as_cr_Letters$as_cr_digits - -# The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then - echo "#! /bin/sh" >conf$$.sh - echo "exit 0" >>conf$$.sh - chmod +x conf$$.sh - if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then - PATH_SEPARATOR=';' - else - PATH_SEPARATOR=: - fi - rm -f conf$$.sh -fi - -# Support unset when possible. -if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then - as_unset=unset -else - as_unset=false -fi - - -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -as_nl=' -' -IFS=" "" $as_nl" - -# Find who we are. Look in the path if we contain no directory separator. -case $0 in - *[\\/]* ) as_myself=$0 ;; - *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break -done -IFS=$as_save_IFS - - ;; -esac -# We did not find ourselves, most probably we were run as `sh COMMAND' -# in which case we are not to be found in the path. -if test "x$as_myself" = x; then - as_myself=$0 -fi -if test ! -f "$as_myself"; then - echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 - { (exit 1); exit 1; } -fi - -# Work around bugs in pre-3.0 UWIN ksh. -for as_var in ENV MAIL MAILPATH -do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -for as_var in \ - LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ - LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ - LC_TELEPHONE LC_TIME -do - if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then - eval $as_var=C; export $as_var - else - ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var - fi -done - -# Required to use basename. -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then - as_basename=basename -else - as_basename=false -fi - - -# Name of the executable. -as_me=`$as_basename -- "$0" || -$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -echo X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - -# CDPATH. -$as_unset CDPATH - - -if test "x$CONFIG_SHELL" = x; then - if (eval ":") 2>/dev/null; then - as_have_required=yes -else - as_have_required=no -fi - - if test $as_have_required = yes && (eval ": -(as_func_return () { - (exit \$1) -} -as_func_success () { - as_func_return 0 -} -as_func_failure () { - as_func_return 1 -} -as_func_ret_success () { - return 0 -} -as_func_ret_failure () { - return 1 -} - -exitcode=0 -if as_func_success; then - : -else - exitcode=1 - echo as_func_success failed. -fi - -if as_func_failure; then - exitcode=1 - echo as_func_failure succeeded. -fi - -if as_func_ret_success; then - : -else - exitcode=1 - echo as_func_ret_success failed. -fi - -if as_func_ret_failure; then - exitcode=1 - echo as_func_ret_failure succeeded. -fi - -if ( set x; as_func_ret_success y && test x = \"\$1\" ); then - : -else - exitcode=1 - echo positional parameters were not saved. -fi - -test \$exitcode = 0) || { (exit 1); exit 1; } - -( - as_lineno_1=\$LINENO - as_lineno_2=\$LINENO - test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" && - test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; } -") 2> /dev/null; then - : -else - as_candidate_shells= - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - case $as_dir in - /*) - for as_base in sh bash ksh sh5; do - as_candidate_shells="$as_candidate_shells $as_dir/$as_base" - done;; - esac -done -IFS=$as_save_IFS - - - for as_shell in $as_candidate_shells $SHELL; do - # Try only shells that exist, to save several forks. - if { test -f "$as_shell" || test -f "$as_shell.exe"; } && - { ("$as_shell") 2> /dev/null <<\_ASEOF -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then - emulate sh - NULLCMD=: - # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in - *posix*) set -o posix ;; -esac - -fi - - -: -_ASEOF -}; then - CONFIG_SHELL=$as_shell - as_have_required=yes - if { "$as_shell" 2> /dev/null <<\_ASEOF -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then - emulate sh - NULLCMD=: - # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in - *posix*) set -o posix ;; -esac - -fi - - -: -(as_func_return () { - (exit $1) -} -as_func_success () { - as_func_return 0 -} -as_func_failure () { - as_func_return 1 -} -as_func_ret_success () { - return 0 -} -as_func_ret_failure () { - return 1 -} - -exitcode=0 -if as_func_success; then - : -else - exitcode=1 - echo as_func_success failed. -fi - -if as_func_failure; then - exitcode=1 - echo as_func_failure succeeded. -fi - -if as_func_ret_success; then - : -else - exitcode=1 - echo as_func_ret_success failed. -fi - -if as_func_ret_failure; then - exitcode=1 - echo as_func_ret_failure succeeded. -fi - -if ( set x; as_func_ret_success y && test x = "$1" ); then - : -else - exitcode=1 - echo positional parameters were not saved. -fi - -test $exitcode = 0) || { (exit 1); exit 1; } - -( - as_lineno_1=$LINENO - as_lineno_2=$LINENO - test "x$as_lineno_1" != "x$as_lineno_2" && - test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; } - -_ASEOF -}; then - break -fi - -fi - - done - - if test "x$CONFIG_SHELL" != x; then - for as_var in BASH_ENV ENV - do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var - done - export CONFIG_SHELL - exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"} -fi - - - if test $as_have_required = no; then - echo This script requires a shell more modern than all the - echo shells that I found on your system. Please install a - echo modern shell, or manually run the script under such a - echo shell if you do have one. - { (exit 1); exit 1; } -fi - - -fi - -fi - - - -(eval "as_func_return () { - (exit \$1) -} -as_func_success () { - as_func_return 0 -} -as_func_failure () { - as_func_return 1 -} -as_func_ret_success () { - return 0 -} -as_func_ret_failure () { - return 1 -} - -exitcode=0 -if as_func_success; then - : -else - exitcode=1 - echo as_func_success failed. -fi - -if as_func_failure; then - exitcode=1 - echo as_func_failure succeeded. -fi - -if as_func_ret_success; then - : -else - exitcode=1 - echo as_func_ret_success failed. -fi - -if as_func_ret_failure; then - exitcode=1 - echo as_func_ret_failure succeeded. -fi - -if ( set x; as_func_ret_success y && test x = \"\$1\" ); then - : -else - exitcode=1 - echo positional parameters were not saved. -fi - -test \$exitcode = 0") || { - echo No shell found that supports shell functions. - echo Please tell autoconf@gnu.org about your system, - echo including any error possibly output before this - echo message -} - - - - as_lineno_1=$LINENO - as_lineno_2=$LINENO - test "x$as_lineno_1" != "x$as_lineno_2" && - test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { - - # Create $as_me.lineno as a copy of $as_myself, but with $LINENO - # uniformly replaced by the line number. The first 'sed' inserts a - # line-number line after each line using $LINENO; the second 'sed' - # does the real work. The second script uses 'N' to pair each - # line-number line with the line containing $LINENO, and appends - # trailing '-' during substitution so that $LINENO is not a special - # case at line end. - # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the - # scripts with optimization help from Paolo Bonzini. Blame Lee - # E. McMahon (1931-1989) for sed's syntax. :-) - sed -n ' - p - /[$]LINENO/= - ' <$as_myself | - sed ' - s/[$]LINENO.*/&-/ - t lineno - b - :lineno - N - :loop - s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ - t loop - s/-\n.*// - ' >$as_me.lineno && - chmod +x "$as_me.lineno" || - { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 - { (exit 1); exit 1; }; } - - # Don't try to exec as it changes $[0], causing all sort of problems - # (the dirname of $[0] is not the place where we might find the - # original and so on. Autoconf is especially sensitive to this). - . "./$as_me.lineno" - # Exit status is that of the last command. - exit -} - - -if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then - as_dirname=dirname -else - as_dirname=false -fi - -ECHO_C= ECHO_N= ECHO_T= -case `echo -n x` in --n*) - case `echo 'x\c'` in - *c*) ECHO_T=' ';; # ECHO_T is single tab character. - *) ECHO_C='\c';; - esac;; -*) - ECHO_N='-n';; -esac - -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -rm -f conf$$ conf$$.exe conf$$.file -if test -d conf$$.dir; then - rm -f conf$$.dir/conf$$.file -else - rm -f conf$$.dir - mkdir conf$$.dir -fi -echo >conf$$.file -if ln -s conf$$.file conf$$ 2>/dev/null; then - as_ln_s='ln -s' - # ... but there are two gotchas: - # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. - # In both cases, we have to default to `cp -p'. - ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || - as_ln_s='cp -p' -elif ln conf$$.file conf$$ 2>/dev/null; then - as_ln_s=ln -else - as_ln_s='cp -p' -fi -rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file -rmdir conf$$.dir 2>/dev/null - -if mkdir -p . 2>/dev/null; then - as_mkdir_p=: -else - test -d ./-p && rmdir ./-p - as_mkdir_p=false -fi - -if test -x / >/dev/null 2>&1; then - as_test_x='test -x' -else - if ls -dL / >/dev/null 2>&1; then - as_ls_L_option=L - else - as_ls_L_option= - fi - as_test_x=' - eval sh -c '\'' - if test -d "$1"; then - test -d "$1/."; - else - case $1 in - -*)set "./$1";; - esac; - case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in - ???[sx]*):;;*)false;;esac;fi - '\'' sh - ' -fi -as_executable_p=$as_test_x - -# Sed expression to map a string onto a valid CPP name. -as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" - -# Sed expression to map a string onto a valid variable name. -as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" - - - -exec 7<&0 &1 - -# Name of the host. -# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, -# so uname gets run too. -ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` - -# -# Initializations. -# -ac_default_prefix=/usr/local -ac_clean_files= -ac_config_libobj_dir=. -LIBOBJS= -cross_compiling=no -subdirs= -MFLAGS= -MAKEFLAGS= -SHELL=${CONFIG_SHELL-/bin/sh} - -# Identity of this package. -PACKAGE_NAME='ThreadPool' -PACKAGE_TARNAME='threadpool' -PACKAGE_VERSION='1.1d' -PACKAGE_STRING='ThreadPool 1.1d' -PACKAGE_BUGREPORT='hcedwar@sandia.gov' - -ac_unique_file="src/TPI.c" -# Factoring default headers for most tests. -ac_includes_default="\ -#include -#ifdef HAVE_SYS_TYPES_H -# include -#endif -#ifdef HAVE_SYS_STAT_H -# include -#endif -#ifdef STDC_HEADERS -# include -# include -#else -# ifdef HAVE_STDLIB_H -# include -# endif -#endif -#ifdef HAVE_STRING_H -# if !defined STDC_HEADERS && defined HAVE_MEMORY_H -# include -# endif -# include -#endif -#ifdef HAVE_STRINGS_H -# include -#endif -#ifdef HAVE_INTTYPES_H -# include -#endif -#ifdef HAVE_STDINT_H -# include -#endif -#ifdef HAVE_UNISTD_H -# include -#endif" - -ac_subst_vars='SHELL -PATH_SEPARATOR -PACKAGE_NAME -PACKAGE_TARNAME -PACKAGE_VERSION -PACKAGE_STRING -PACKAGE_BUGREPORT -exec_prefix -prefix -program_transform_name -bindir -sbindir -libexecdir -datarootdir -datadir -sysconfdir -sharedstatedir -localstatedir -includedir -oldincludedir -docdir -infodir -htmldir -dvidir -pdfdir -psdir -libdir -localedir -mandir -DEFS -ECHO_C -ECHO_N -ECHO_T -LIBS -build_alias -host_alias -target_alias -MAINTAINER_MODE_TRUE -MAINTAINER_MODE_FALSE -MAINT -build -build_cpu -build_vendor -build_os -host -host_cpu -host_vendor -host_os -target -target_cpu -target_vendor -target_os -INSTALL_PROGRAM -INSTALL_SCRIPT -INSTALL_DATA -am__isrc -CYGPATH_W -PACKAGE -VERSION -ACLOCAL -AUTOCONF -AUTOMAKE -AUTOHEADER -MAKEINFO -install_sh -STRIP -INSTALL_STRIP_PROGRAM -mkdir_p -AWK -SET_MAKE -am__leading_dot -AMTAR -am__tar -am__untar -MPI_TEMP_CXX -MPI_CXX -HAVE_MPI_TRUE -HAVE_MPI_FALSE -MPI_CXX_EXISTS -MPI_CC_EXISTS -MPI_F77_EXISTS -CC -CFLAGS -LDFLAGS -CPPFLAGS -ac_ct_CC -EXEEXT -OBJEXT -DEPDIR -am__include -am__quote -AMDEP_TRUE -AMDEP_FALSE -AMDEPBACKSLASH -CCDEPMODE -am__fastdepCC_TRUE -am__fastdepCC_FALSE -CXX -CXXFLAGS -ac_ct_CXX -CXXDEPMODE -am__fastdepCXX_TRUE -am__fastdepCXX_FALSE -RANLIB -USE_ALTERNATE_AR_TRUE -USE_ALTERNATE_AR_FALSE -ALTERNATE_AR -CXXCPP -USING_EXPORT_MAKEFILES_TRUE -USING_EXPORT_MAKEFILES_FALSE -PERL_EXE -HAVE_PERL -USING_PERL_TRUE -USING_PERL_FALSE -USING_GNUMAKE_TRUE -USING_GNUMAKE_FALSE -BUILD_TESTS_TRUE -BUILD_TESTS_FALSE -SUB_TEST_TRUE -SUB_TEST_FALSE -GREP -EGREP -PTHREAD_CC -PTHREAD_LIBS -PTHREAD_CFLAGS -ac_aux_dir -LIBOBJS -LTLIBOBJS' -ac_subst_files='' - ac_precious_vars='build_alias -host_alias -target_alias -CC -CFLAGS -LDFLAGS -LIBS -CPPFLAGS -CXX -CXXFLAGS -CCC -CXXCPP' - - -# Initialize some variables set by options. -ac_init_help= -ac_init_version=false -# The variables have the same names as the options, with -# dashes changed to underlines. -cache_file=/dev/null -exec_prefix=NONE -no_create= -no_recursion= -prefix=NONE -program_prefix=NONE -program_suffix=NONE -program_transform_name=s,x,x, -silent= -site= -srcdir= -verbose= -x_includes=NONE -x_libraries=NONE - -# Installation directory options. -# These are left unexpanded so users can "make install exec_prefix=/foo" -# and all the variables that are supposed to be based on exec_prefix -# by default will actually change. -# Use braces instead of parens because sh, perl, etc. also accept them. -# (The list follows the same order as the GNU Coding Standards.) -bindir='${exec_prefix}/bin' -sbindir='${exec_prefix}/sbin' -libexecdir='${exec_prefix}/libexec' -datarootdir='${prefix}/share' -datadir='${datarootdir}' -sysconfdir='${prefix}/etc' -sharedstatedir='${prefix}/com' -localstatedir='${prefix}/var' -includedir='${prefix}/include' -oldincludedir='/usr/include' -docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' -infodir='${datarootdir}/info' -htmldir='${docdir}' -dvidir='${docdir}' -pdfdir='${docdir}' -psdir='${docdir}' -libdir='${exec_prefix}/lib' -localedir='${datarootdir}/locale' -mandir='${datarootdir}/man' - -ac_prev= -ac_dashdash= -for ac_option -do - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval $ac_prev=\$ac_option - ac_prev= - continue - fi - - case $ac_option in - *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; - *) ac_optarg=yes ;; - esac - - # Accept the important Cygnus configure options, so we can diagnose typos. - - case $ac_dashdash$ac_option in - --) - ac_dashdash=yes ;; - - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=bindir ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - bindir=$ac_optarg ;; - - -build | --build | --buil | --bui | --bu) - ac_prev=build_alias ;; - -build=* | --build=* | --buil=* | --bui=* | --bu=*) - build_alias=$ac_optarg ;; - - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - cache_file=$ac_optarg ;; - - --config-cache | -C) - cache_file=config.cache ;; - - -datadir | --datadir | --datadi | --datad) - ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=*) - datadir=$ac_optarg ;; - - -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ - | --dataroo | --dataro | --datar) - ac_prev=datarootdir ;; - -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ - | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) - datarootdir=$ac_optarg ;; - - -disable-* | --disable-*) - ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null && - { echo "$as_me: error: invalid feature name: $ac_feature" >&2 - { (exit 1); exit 1; }; } - ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'` - eval enable_$ac_feature=no ;; - - -docdir | --docdir | --docdi | --doc | --do) - ac_prev=docdir ;; - -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) - docdir=$ac_optarg ;; - - -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) - ac_prev=dvidir ;; - -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) - dvidir=$ac_optarg ;; - - -enable-* | --enable-*) - ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null && - { echo "$as_me: error: invalid feature name: $ac_feature" >&2 - { (exit 1); exit 1; }; } - ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'` - eval enable_$ac_feature=\$ac_optarg ;; - - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ - | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ - | --exec | --exe | --ex) - ac_prev=exec_prefix ;; - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ - | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ - | --exec=* | --exe=* | --ex=*) - exec_prefix=$ac_optarg ;; - - -gas | --gas | --ga | --g) - # Obsolete; use --with-gas. - with_gas=yes ;; - - -help | --help | --hel | --he | -h) - ac_init_help=long ;; - -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) - ac_init_help=recursive ;; - -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) - ac_init_help=short ;; - - -host | --host | --hos | --ho) - ac_prev=host_alias ;; - -host=* | --host=* | --hos=* | --ho=*) - host_alias=$ac_optarg ;; - - -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) - ac_prev=htmldir ;; - -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ - | --ht=*) - htmldir=$ac_optarg ;; - - -includedir | --includedir | --includedi | --included | --include \ - | --includ | --inclu | --incl | --inc) - ac_prev=includedir ;; - -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ - | --includ=* | --inclu=* | --incl=* | --inc=*) - includedir=$ac_optarg ;; - - -infodir | --infodir | --infodi | --infod | --info | --inf) - ac_prev=infodir ;; - -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) - infodir=$ac_optarg ;; - - -libdir | --libdir | --libdi | --libd) - ac_prev=libdir ;; - -libdir=* | --libdir=* | --libdi=* | --libd=*) - libdir=$ac_optarg ;; - - -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ - | --libexe | --libex | --libe) - ac_prev=libexecdir ;; - -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ - | --libexe=* | --libex=* | --libe=*) - libexecdir=$ac_optarg ;; - - -localedir | --localedir | --localedi | --localed | --locale) - ac_prev=localedir ;; - -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) - localedir=$ac_optarg ;; - - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst | --locals) - ac_prev=localstatedir ;; - -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) - localstatedir=$ac_optarg ;; - - -mandir | --mandir | --mandi | --mand | --man | --ma | --m) - ac_prev=mandir ;; - -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) - mandir=$ac_optarg ;; - - -nfp | --nfp | --nf) - # Obsolete; use --without-fp. - with_fp=no ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c | -n) - no_create=yes ;; - - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) - no_recursion=yes ;; - - -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ - | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ - | --oldin | --oldi | --old | --ol | --o) - ac_prev=oldincludedir ;; - -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ - | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ - | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) - oldincludedir=$ac_optarg ;; - - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - ac_prev=prefix ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix=$ac_optarg ;; - - -program-prefix | --program-prefix | --program-prefi | --program-pref \ - | --program-pre | --program-pr | --program-p) - ac_prev=program_prefix ;; - -program-prefix=* | --program-prefix=* | --program-prefi=* \ - | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) - program_prefix=$ac_optarg ;; - - -program-suffix | --program-suffix | --program-suffi | --program-suff \ - | --program-suf | --program-su | --program-s) - ac_prev=program_suffix ;; - -program-suffix=* | --program-suffix=* | --program-suffi=* \ - | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) - program_suffix=$ac_optarg ;; - - -program-transform-name | --program-transform-name \ - | --program-transform-nam | --program-transform-na \ - | --program-transform-n | --program-transform- \ - | --program-transform | --program-transfor \ - | --program-transfo | --program-transf \ - | --program-trans | --program-tran \ - | --progr-tra | --program-tr | --program-t) - ac_prev=program_transform_name ;; - -program-transform-name=* | --program-transform-name=* \ - | --program-transform-nam=* | --program-transform-na=* \ - | --program-transform-n=* | --program-transform-=* \ - | --program-transform=* | --program-transfor=* \ - | --program-transfo=* | --program-transf=* \ - | --program-trans=* | --program-tran=* \ - | --progr-tra=* | --program-tr=* | --program-t=*) - program_transform_name=$ac_optarg ;; - - -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) - ac_prev=pdfdir ;; - -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) - pdfdir=$ac_optarg ;; - - -psdir | --psdir | --psdi | --psd | --ps) - ac_prev=psdir ;; - -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) - psdir=$ac_optarg ;; - - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - silent=yes ;; - - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) - ac_prev=sbindir ;; - -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ - | --sbi=* | --sb=*) - sbindir=$ac_optarg ;; - - -sharedstatedir | --sharedstatedir | --sharedstatedi \ - | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ - | --sharedst | --shareds | --shared | --share | --shar \ - | --sha | --sh) - ac_prev=sharedstatedir ;; - -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ - | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ - | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ - | --sha=* | --sh=*) - sharedstatedir=$ac_optarg ;; - - -site | --site | --sit) - ac_prev=site ;; - -site=* | --site=* | --sit=*) - site=$ac_optarg ;; - - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - srcdir=$ac_optarg ;; - - -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ - | --syscon | --sysco | --sysc | --sys | --sy) - ac_prev=sysconfdir ;; - -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ - | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) - sysconfdir=$ac_optarg ;; - - -target | --target | --targe | --targ | --tar | --ta | --t) - ac_prev=target_alias ;; - -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) - target_alias=$ac_optarg ;; - - -v | -verbose | --verbose | --verbos | --verbo | --verb) - verbose=yes ;; - - -version | --version | --versio | --versi | --vers | -V) - ac_init_version=: ;; - - -with-* | --with-*) - ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null && - { echo "$as_me: error: invalid package name: $ac_package" >&2 - { (exit 1); exit 1; }; } - ac_package=`echo $ac_package | sed 's/[-.]/_/g'` - eval with_$ac_package=\$ac_optarg ;; - - -without-* | --without-*) - ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null && - { echo "$as_me: error: invalid package name: $ac_package" >&2 - { (exit 1); exit 1; }; } - ac_package=`echo $ac_package | sed 's/[-.]/_/g'` - eval with_$ac_package=no ;; - - --x) - # Obsolete; use --with-x. - with_x=yes ;; - - -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ - | --x-incl | --x-inc | --x-in | --x-i) - ac_prev=x_includes ;; - -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ - | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) - x_includes=$ac_optarg ;; - - -x-libraries | --x-libraries | --x-librarie | --x-librari \ - | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) - ac_prev=x_libraries ;; - -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ - | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) - x_libraries=$ac_optarg ;; - - -*) { echo "$as_me: error: unrecognized option: $ac_option -Try \`$0 --help' for more information." >&2 - { (exit 1); exit 1; }; } - ;; - - *=*) - ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` - # Reject names that are not valid shell variable names. - expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && - { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 - { (exit 1); exit 1; }; } - eval $ac_envvar=\$ac_optarg - export $ac_envvar ;; - - *) - # FIXME: should be removed in autoconf 3.0. - echo "$as_me: WARNING: you should use --build, --host, --target" >&2 - expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && - echo "$as_me: WARNING: invalid host type: $ac_option" >&2 - : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} - ;; - - esac -done - -if test -n "$ac_prev"; then - ac_option=--`echo $ac_prev | sed 's/_/-/g'` - { echo "$as_me: error: missing argument to $ac_option" >&2 - { (exit 1); exit 1; }; } -fi - -# Be sure to have absolute directory names. -for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ - datadir sysconfdir sharedstatedir localstatedir includedir \ - oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir -do - eval ac_val=\$$ac_var - case $ac_val in - [\\/$]* | ?:[\\/]* ) continue;; - NONE | '' ) case $ac_var in *prefix ) continue;; esac;; - esac - { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 - { (exit 1); exit 1; }; } -done - -# There might be people who depend on the old broken behavior: `$host' -# used to hold the argument of --host etc. -# FIXME: To remove some day. -build=$build_alias -host=$host_alias -target=$target_alias - -# FIXME: To remove some day. -if test "x$host_alias" != x; then - if test "x$build_alias" = x; then - cross_compiling=maybe - echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. - If a cross compiler is detected then cross compile mode will be used." >&2 - elif test "x$build_alias" != "x$host_alias"; then - cross_compiling=yes - fi -fi - -ac_tool_prefix= -test -n "$host_alias" && ac_tool_prefix=$host_alias- - -test "$silent" = yes && exec 6>/dev/null - - -ac_pwd=`pwd` && test -n "$ac_pwd" && -ac_ls_di=`ls -di .` && -ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || - { echo "$as_me: error: Working directory cannot be determined" >&2 - { (exit 1); exit 1; }; } -test "X$ac_ls_di" = "X$ac_pwd_ls_di" || - { echo "$as_me: error: pwd does not report name of working directory" >&2 - { (exit 1); exit 1; }; } - - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - ac_srcdir_defaulted=yes - # Try the directory containing this script, then the parent directory. - ac_confdir=`$as_dirname -- "$0" || -$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$0" : 'X\(//\)[^/]' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -echo X"$0" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - srcdir=$ac_confdir - if test ! -r "$srcdir/$ac_unique_file"; then - srcdir=.. - fi -else - ac_srcdir_defaulted=no -fi -if test ! -r "$srcdir/$ac_unique_file"; then - test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." - { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 - { (exit 1); exit 1; }; } -fi -ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" -ac_abs_confdir=`( - cd "$srcdir" && test -r "./$ac_unique_file" || { echo "$as_me: error: $ac_msg" >&2 - { (exit 1); exit 1; }; } - pwd)` -# When building in place, set srcdir=. -if test "$ac_abs_confdir" = "$ac_pwd"; then - srcdir=. -fi -# Remove unnecessary trailing slashes from srcdir. -# Double slashes in file names in object file debugging info -# mess up M-x gdb in Emacs. -case $srcdir in -*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; -esac -for ac_var in $ac_precious_vars; do - eval ac_env_${ac_var}_set=\${${ac_var}+set} - eval ac_env_${ac_var}_value=\$${ac_var} - eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} - eval ac_cv_env_${ac_var}_value=\$${ac_var} -done - -# -# Report the --help message. -# -if test "$ac_init_help" = "long"; then - # Omit some internal or obsolete options to make the list less imposing. - # This message is too long to be a string in the A/UX 3.1 sh. - cat <<_ACEOF -\`configure' configures ThreadPool 1.1d to adapt to many kinds of systems. - -Usage: $0 [OPTION]... [VAR=VALUE]... - -To assign environment variables (e.g., CC, CFLAGS...), specify them as -VAR=VALUE. See below for descriptions of some of the useful variables. - -Defaults for the options are specified in brackets. - -Configuration: - -h, --help display this help and exit - --help=short display options specific to this package - --help=recursive display the short help of all the included packages - -V, --version display version information and exit - -q, --quiet, --silent do not print \`checking...' messages - --cache-file=FILE cache test results in FILE [disabled] - -C, --config-cache alias for \`--cache-file=config.cache' - -n, --no-create do not create output files - --srcdir=DIR find the sources in DIR [configure dir or \`..'] - -Installation directories: - --prefix=PREFIX install architecture-independent files in PREFIX - [$ac_default_prefix] - --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX - [PREFIX] - -By default, \`make install' will install all the files in -\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify -an installation prefix other than \`$ac_default_prefix' using \`--prefix', -for instance \`--prefix=\$HOME'. - -For better control, use the options below. - -Fine tuning of the installation directories: - --bindir=DIR user executables [EPREFIX/bin] - --sbindir=DIR system admin executables [EPREFIX/sbin] - --libexecdir=DIR program executables [EPREFIX/libexec] - --sysconfdir=DIR read-only single-machine data [PREFIX/etc] - --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] - --localstatedir=DIR modifiable single-machine data [PREFIX/var] - --libdir=DIR object code libraries [EPREFIX/lib] - --includedir=DIR C header files [PREFIX/include] - --oldincludedir=DIR C header files for non-gcc [/usr/include] - --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] - --datadir=DIR read-only architecture-independent data [DATAROOTDIR] - --infodir=DIR info documentation [DATAROOTDIR/info] - --localedir=DIR locale-dependent data [DATAROOTDIR/locale] - --mandir=DIR man documentation [DATAROOTDIR/man] - --docdir=DIR documentation root [DATAROOTDIR/doc/threadpool] - --htmldir=DIR html documentation [DOCDIR] - --dvidir=DIR dvi documentation [DOCDIR] - --pdfdir=DIR pdf documentation [DOCDIR] - --psdir=DIR ps documentation [DOCDIR] -_ACEOF - - cat <<\_ACEOF - -Program names: - --program-prefix=PREFIX prepend PREFIX to installed program names - --program-suffix=SUFFIX append SUFFIX to installed program names - --program-transform-name=PROGRAM run sed PROGRAM on installed program names - -System types: - --build=BUILD configure for building on BUILD [guessed] - --host=HOST cross-compile to build programs to run on HOST [BUILD] - --target=TARGET configure for building compilers for TARGET [HOST] -_ACEOF -fi - -if test -n "$ac_init_help"; then - case $ac_init_help in - short | recursive ) echo "Configuration of ThreadPool 1.1d:";; - esac - cat <<\_ACEOF - -Optional Features: - --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) - --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --enable-maintainer-mode enable make rules and dependencies not useful - (and sometimes confusing) to the casual installer - --enable-mpi MPI support - --disable-dependency-tracking speeds up one-time build - --enable-dependency-tracking do not reject slow dependency extractors - --enable-export-makefiles - Creates export makefiles in the install (prefix) - directory. This option requires perl to be set in - your path or defined with --with-perl=. Note that the export makefiles are - always created and used in the build directory, but - will not be installable without this option to - change the paths. (default is yes) - --enable-tests Make tests for all Trilinos packages buildable with - 'make tests' (default is yes) - - --enable-threadpool-tests - Make ThreadPool tests buildable with 'make tests' - (default is yes if --disable-tests is not specified) - --enable-libcheck Check for some third-party libraries. (Cannot be - disabled unless tests and examples are also - disabled.) (default is yes) - -Optional Packages: - --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] - --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) - --with-install=INSTALL_PROGRAM - Use the installation program INSTALL_PROGRAM rather - the default that is provided. For example - --with-install="/path/install -p" - --with-mpi-compilers=PATH - use MPI compilers mpicc, mpif77, and mpicxx, mpic++ - or mpiCC in the specified path or in the default - path if no path is specified. Enables MPI - --with-mpi=MPIROOT use MPI root directory (enables MPI) - --with-mpi-libs="LIBS" MPI libraries ["-lmpi"] - --with-mpi-incdir=DIR MPI include directory [MPIROOT/include] Do not use - -I - --with-mpi-libdir=DIR MPI library directory [MPIROOT/lib] Do not use -L - --with-ccflags additional CCFLAGS flags to be added: will prepend - to CCFLAGS - --with-cxxflags additional CXXFLAGS flags to be added: will - prepend to CXXFLAGS - --with-cflags additional CFLAGS flags to be added: will prepend - to CFLAGS - --with-libs List additional libraries here. For example, - --with-libs=-lsuperlu or - --with-libs=/path/libsuperlu.a - --with-ldflags additional LDFLAGS flags to be added: will prepend - to LDFLAGS - --with-ar override archiver command (default is "ar cru") - --with-perl supply a perl executable. For example - --with-perl=/usr/bin/perl. - --with-gnumake Gnu's make has special functions we can use to - eliminate redundant paths in the build and link - lines. Enable this if you use gnu-make to build - Trilinos. This requires that perl is in your path or - that you have specified the perl executable with - --with-perl=. Configure will check - for the existence of the perl executable and quit - with an error if it is not found. (default is no) - --with-libdirs OBSOLETE use --with-ldflags instead. (ex. - --with-ldflags="-L -L") - --with-incdirs additional directories containing include files: - will prepend to search here for includes, use -Idir - format - -Some influential environment variables: - CC C compiler command - CFLAGS C compiler flags - LDFLAGS linker flags, e.g. -L if you have libraries in a - nonstandard directory - LIBS libraries to pass to the linker, e.g. -l - CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if - you have headers in a nonstandard directory - CXX C++ compiler command - CXXFLAGS C++ compiler flags - CXXCPP C++ preprocessor - -Use these variables to override the choices made by `configure' or to help -it to find libraries and programs with nonstandard names/locations. - -Report bugs to . -_ACEOF -ac_status=$? -fi - -if test "$ac_init_help" = "recursive"; then - # If there are subdirs, report their specific --help. - for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue - test -d "$ac_dir" || continue - ac_builddir=. - -case "$ac_dir" in -.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; -*) - ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` - # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'` - case $ac_top_builddir_sub in - "") ac_top_builddir_sub=. ac_top_build_prefix= ;; - *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; - esac ;; -esac -ac_abs_top_builddir=$ac_pwd -ac_abs_builddir=$ac_pwd$ac_dir_suffix -# for backward compatibility: -ac_top_builddir=$ac_top_build_prefix - -case $srcdir in - .) # We are building in place. - ac_srcdir=. - ac_top_srcdir=$ac_top_builddir_sub - ac_abs_top_srcdir=$ac_pwd ;; - [\\/]* | ?:[\\/]* ) # Absolute name. - ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir - ac_abs_top_srcdir=$srcdir ;; - *) # Relative name. - ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_build_prefix$srcdir - ac_abs_top_srcdir=$ac_pwd/$srcdir ;; -esac -ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix - - cd "$ac_dir" || { ac_status=$?; continue; } - # Check for guested configure. - if test -f "$ac_srcdir/configure.gnu"; then - echo && - $SHELL "$ac_srcdir/configure.gnu" --help=recursive - elif test -f "$ac_srcdir/configure"; then - echo && - $SHELL "$ac_srcdir/configure" --help=recursive - else - echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 - fi || ac_status=$? - cd "$ac_pwd" || { ac_status=$?; break; } - done -fi - -test -n "$ac_init_help" && exit $ac_status -if $ac_init_version; then - cat <<\_ACEOF -ThreadPool configure 1.1d -generated by GNU Autoconf 2.61 - -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, -2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. -This configure script is free software; the Free Software Foundation -gives unlimited permission to copy, distribute and modify it. -_ACEOF - exit -fi -cat >config.log <<_ACEOF -This file contains any messages produced by compilers while -running configure, to aid debugging if configure makes a mistake. - -It was created by ThreadPool $as_me 1.1d, which was -generated by GNU Autoconf 2.61. Invocation command line was - - $ $0 $@ - -_ACEOF -exec 5>>config.log -{ -cat <<_ASUNAME -## --------- ## -## Platform. ## -## --------- ## - -hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` -uname -m = `(uname -m) 2>/dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` - -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` - -/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` -/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` -/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` -/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` - -_ASUNAME - -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - echo "PATH: $as_dir" -done -IFS=$as_save_IFS - -} >&5 - -cat >&5 <<_ACEOF - - -## ----------- ## -## Core tests. ## -## ----------- ## - -_ACEOF - - -# Keep a trace of the command line. -# Strip out --no-create and --no-recursion so they do not pile up. -# Strip out --silent because we don't want to record it for future runs. -# Also quote any args containing shell meta-characters. -# Make two passes to allow for proper duplicate-argument suppression. -ac_configure_args= -ac_configure_args0= -ac_configure_args1= -ac_must_keep_next=false -for ac_pass in 1 2 -do - for ac_arg - do - case $ac_arg in - -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - continue ;; - *\'*) - ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; - esac - case $ac_pass in - 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; - 2) - ac_configure_args1="$ac_configure_args1 '$ac_arg'" - if test $ac_must_keep_next = true; then - ac_must_keep_next=false # Got value, back to normal. - else - case $ac_arg in - *=* | --config-cache | -C | -disable-* | --disable-* \ - | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ - | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ - | -with-* | --with-* | -without-* | --without-* | --x) - case "$ac_configure_args0 " in - "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; - esac - ;; - -* ) ac_must_keep_next=true ;; - esac - fi - ac_configure_args="$ac_configure_args '$ac_arg'" - ;; - esac - done -done -$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } -$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } - -# When interrupted or exit'd, cleanup temporary files, and complete -# config.log. We remove comments because anyway the quotes in there -# would cause problems or look ugly. -# WARNING: Use '\'' to represent an apostrophe within the trap. -# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. -trap 'exit_status=$? - # Save into config.log some information that might help in debugging. - { - echo - - cat <<\_ASBOX -## ---------------- ## -## Cache variables. ## -## ---------------- ## -_ASBOX - echo - # The following way of writing the cache mishandles newlines in values, -( - for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do - eval ac_val=\$$ac_var - case $ac_val in #( - *${as_nl}*) - case $ac_var in #( - *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5 -echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;; - esac - case $ac_var in #( - _ | IFS | as_nl) ;; #( - *) $as_unset $ac_var ;; - esac ;; - esac - done - (set) 2>&1 | - case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( - *${as_nl}ac_space=\ *) - sed -n \ - "s/'\''/'\''\\\\'\'''\''/g; - s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" - ;; #( - *) - sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" - ;; - esac | - sort -) - echo - - cat <<\_ASBOX -## ----------------- ## -## Output variables. ## -## ----------------- ## -_ASBOX - echo - for ac_var in $ac_subst_vars - do - eval ac_val=\$$ac_var - case $ac_val in - *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; - esac - echo "$ac_var='\''$ac_val'\''" - done | sort - echo - - if test -n "$ac_subst_files"; then - cat <<\_ASBOX -## ------------------- ## -## File substitutions. ## -## ------------------- ## -_ASBOX - echo - for ac_var in $ac_subst_files - do - eval ac_val=\$$ac_var - case $ac_val in - *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; - esac - echo "$ac_var='\''$ac_val'\''" - done | sort - echo - fi - - if test -s confdefs.h; then - cat <<\_ASBOX -## ----------- ## -## confdefs.h. ## -## ----------- ## -_ASBOX - echo - cat confdefs.h - echo - fi - test "$ac_signal" != 0 && - echo "$as_me: caught signal $ac_signal" - echo "$as_me: exit $exit_status" - } >&5 - rm -f core *.core core.conftest.* && - rm -f -r conftest* confdefs* conf$$* $ac_clean_files && - exit $exit_status -' 0 -for ac_signal in 1 2 13 15; do - trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal -done -ac_signal=0 - -# confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -f -r conftest* confdefs.h - -# Predefined preprocessor variables. - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_NAME "$PACKAGE_NAME" -_ACEOF - - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_TARNAME "$PACKAGE_TARNAME" -_ACEOF - - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_VERSION "$PACKAGE_VERSION" -_ACEOF - - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_STRING "$PACKAGE_STRING" -_ACEOF - - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" -_ACEOF - - -# Let the site file select an alternate cache file if it wants to. -# Prefer explicitly selected file to automatically selected ones. -if test -n "$CONFIG_SITE"; then - set x "$CONFIG_SITE" -elif test "x$prefix" != xNONE; then - set x "$prefix/share/config.site" "$prefix/etc/config.site" -else - set x "$ac_default_prefix/share/config.site" \ - "$ac_default_prefix/etc/config.site" -fi -shift -for ac_site_file -do - if test -r "$ac_site_file"; then - { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 -echo "$as_me: loading site script $ac_site_file" >&6;} - sed 's/^/| /' "$ac_site_file" >&5 - . "$ac_site_file" - fi -done - -if test -r "$cache_file"; then - # Some versions of bash will fail to source /dev/null (special - # files actually), so we avoid doing that. - if test -f "$cache_file"; then - { echo "$as_me:$LINENO: loading cache $cache_file" >&5 -echo "$as_me: loading cache $cache_file" >&6;} - case $cache_file in - [\\/]* | ?:[\\/]* ) . "$cache_file";; - *) . "./$cache_file";; - esac - fi -else - { echo "$as_me:$LINENO: creating cache $cache_file" >&5 -echo "$as_me: creating cache $cache_file" >&6;} - >$cache_file -fi - -# Check that the precious variables saved in the cache have kept the same -# value. -ac_cache_corrupted=false -for ac_var in $ac_precious_vars; do - eval ac_old_set=\$ac_cv_env_${ac_var}_set - eval ac_new_set=\$ac_env_${ac_var}_set - eval ac_old_val=\$ac_cv_env_${ac_var}_value - eval ac_new_val=\$ac_env_${ac_var}_value - case $ac_old_set,$ac_new_set in - set,) - { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 -echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} - ac_cache_corrupted=: ;; - ,set) - { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 -echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} - ac_cache_corrupted=: ;; - ,);; - *) - if test "x$ac_old_val" != "x$ac_new_val"; then - { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 -echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} - { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 -echo "$as_me: former value: $ac_old_val" >&2;} - { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 -echo "$as_me: current value: $ac_new_val" >&2;} - ac_cache_corrupted=: - fi;; - esac - # Pass precious variables to config.status. - if test "$ac_new_set" = set; then - case $ac_new_val in - *\'*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; - *) ac_arg=$ac_var=$ac_new_val ;; - esac - case " $ac_configure_args " in - *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. - *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; - esac - fi -done -if $ac_cache_corrupted; then - { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 -echo "$as_me: error: changes in the environment can compromise the build" >&2;} - { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 -echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} - { (exit 1); exit 1; }; } -fi - - - - - - - - - - - - - - - - - - - - - - - - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - - -# Hello World! -echo "----------------------------------------" -echo "Running ThreadPool Configure Script" -echo "----------------------------------------" - -# This is to protect against accidentally specifying the wrong -# directory with --srcdir. Any file in that directory will do, -# preferably one that is unlikely to be removed or renamed. - - - -# Specify directory for auxillary build tools (e.g., install-sh, -# config.sub, config.guess) and M4 files. - -ac_aux_dir= -for ac_dir in config "$srcdir"/config; do - if test -f "$ac_dir/install-sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install-sh -c" - break - elif test -f "$ac_dir/install.sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install.sh -c" - break - elif test -f "$ac_dir/shtool"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/shtool install -c" - break - fi -done -if test -z "$ac_aux_dir"; then - { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in config \"$srcdir\"/config" >&5 -echo "$as_me: error: cannot find install-sh or install.sh in config \"$srcdir\"/config" >&2;} - { (exit 1); exit 1; }; } -fi - -# These three variables are undocumented and unsupported, -# and are intended to be withdrawn in a future Autoconf release. -# They can cause serious problems if a builder's source tree is in a directory -# whose full name contains unusual characters. -ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. -ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. -ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. - - -# #auto np# - Change file names in next line -# Configure should create src/ThreadPool_config.h from src/ThreadPool_config.h.in - -ac_config_headers="$ac_config_headers src/ThreadPool_config.h:src/ThreadPool_config.h.in" - - -# Allow users to specify their own "install" command. If none is specified, -# the default is install-sh found in the config subdirectory. - - -# Check whether --with-install was given. -if test "${with_install+set}" = set; then - withval=$with_install; - INSTALL=$withval - INSTALL_PROGRAM=$withval - INSTALL_SCRIPT=$withval - INSTALL_DATA="$withval -m 644" - -fi - - -# AM_MAINTAINER_MODE turns off maintainer-only makefile targets by -# default, and changes configure to understand a -# --enable-maintainer-mode option. --enable-maintainer-mode turns the -# maintainer-only targets back on. The maintainer-only makefile -# targets permit end users to clean automatically-generated files such -# as configure, which means they have to have autoconf and automake -# installed to repair the damage. AM_MAINTAINER_MODE makes it a bit -# harder for users to shoot themselves in the foot. - -{ echo "$as_me:$LINENO: checking whether to enable maintainer-specific portions of Makefiles" >&5 -echo $ECHO_N "checking whether to enable maintainer-specific portions of Makefiles... $ECHO_C" >&6; } - # Check whether --enable-maintainer-mode was given. -if test "${enable_maintainer_mode+set}" = set; then - enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval -else - USE_MAINTAINER_MODE=no -fi - - { echo "$as_me:$LINENO: result: $USE_MAINTAINER_MODE" >&5 -echo "${ECHO_T}$USE_MAINTAINER_MODE" >&6; } - if test $USE_MAINTAINER_MODE = yes; then - MAINTAINER_MODE_TRUE= - MAINTAINER_MODE_FALSE='#' -else - MAINTAINER_MODE_TRUE='#' - MAINTAINER_MODE_FALSE= -fi - - MAINT=$MAINTAINER_MODE_TRUE - - - -# Define $build, $host, $target, etc - -# Make sure we can run config.sub. -$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || - { { echo "$as_me:$LINENO: error: cannot run $SHELL $ac_aux_dir/config.sub" >&5 -echo "$as_me: error: cannot run $SHELL $ac_aux_dir/config.sub" >&2;} - { (exit 1); exit 1; }; } - -{ echo "$as_me:$LINENO: checking build system type" >&5 -echo $ECHO_N "checking build system type... $ECHO_C" >&6; } -if test "${ac_cv_build+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_build_alias=$build_alias -test "x$ac_build_alias" = x && - ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` -test "x$ac_build_alias" = x && - { { echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5 -echo "$as_me: error: cannot guess build type; you must specify one" >&2;} - { (exit 1); exit 1; }; } -ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || - { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&5 -echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&2;} - { (exit 1); exit 1; }; } - -fi -{ echo "$as_me:$LINENO: result: $ac_cv_build" >&5 -echo "${ECHO_T}$ac_cv_build" >&6; } -case $ac_cv_build in -*-*-*) ;; -*) { { echo "$as_me:$LINENO: error: invalid value of canonical build" >&5 -echo "$as_me: error: invalid value of canonical build" >&2;} - { (exit 1); exit 1; }; };; -esac -build=$ac_cv_build -ac_save_IFS=$IFS; IFS='-' -set x $ac_cv_build -shift -build_cpu=$1 -build_vendor=$2 -shift; shift -# Remember, the first character of IFS is used to create $*, -# except with old shells: -build_os=$* -IFS=$ac_save_IFS -case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac - - -{ echo "$as_me:$LINENO: checking host system type" >&5 -echo $ECHO_N "checking host system type... $ECHO_C" >&6; } -if test "${ac_cv_host+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test "x$host_alias" = x; then - ac_cv_host=$ac_cv_build -else - ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || - { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&5 -echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&2;} - { (exit 1); exit 1; }; } -fi - -fi -{ echo "$as_me:$LINENO: result: $ac_cv_host" >&5 -echo "${ECHO_T}$ac_cv_host" >&6; } -case $ac_cv_host in -*-*-*) ;; -*) { { echo "$as_me:$LINENO: error: invalid value of canonical host" >&5 -echo "$as_me: error: invalid value of canonical host" >&2;} - { (exit 1); exit 1; }; };; -esac -host=$ac_cv_host -ac_save_IFS=$IFS; IFS='-' -set x $ac_cv_host -shift -host_cpu=$1 -host_vendor=$2 -shift; shift -# Remember, the first character of IFS is used to create $*, -# except with old shells: -host_os=$* -IFS=$ac_save_IFS -case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac - - -{ echo "$as_me:$LINENO: checking target system type" >&5 -echo $ECHO_N "checking target system type... $ECHO_C" >&6; } -if test "${ac_cv_target+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test "x$target_alias" = x; then - ac_cv_target=$ac_cv_host -else - ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` || - { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $target_alias failed" >&5 -echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $target_alias failed" >&2;} - { (exit 1); exit 1; }; } -fi - -fi -{ echo "$as_me:$LINENO: result: $ac_cv_target" >&5 -echo "${ECHO_T}$ac_cv_target" >&6; } -case $ac_cv_target in -*-*-*) ;; -*) { { echo "$as_me:$LINENO: error: invalid value of canonical target" >&5 -echo "$as_me: error: invalid value of canonical target" >&2;} - { (exit 1); exit 1; }; };; -esac -target=$ac_cv_target -ac_save_IFS=$IFS; IFS='-' -set x $ac_cv_target -shift -target_cpu=$1 -target_vendor=$2 -shift; shift -# Remember, the first character of IFS is used to create $*, -# except with old shells: -target_os=$* -IFS=$ac_save_IFS -case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac - - -# The aliases save the names the user supplied, while $host etc. -# will get canonicalized. -test -n "$target_alias" && - test "$program_prefix$program_suffix$program_transform_name" = \ - NONENONEs,x,x, && - program_prefix=${target_alias}- - -# Use automake - -# - Required version of automake. -am__api_version='1.10' - -# Find a good install program. We prefer a C program (faster), -# so one script is as good as another. But avoid the broken or -# incompatible versions: -# SysV /etc/install, /usr/sbin/install -# SunOS /usr/etc/install -# IRIX /sbin/install -# AIX /bin/install -# AmigaOS /C/install, which installs bootblocks on floppy discs -# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag -# AFS /usr/afsws/bin/install, which mishandles nonexistent args -# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" -# OS/2's system install, which has a completely different semantic -# ./install, which can be erroneously created by make from ./install.sh. -{ echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5 -echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6; } -if test -z "$INSTALL"; then -if test "${ac_cv_path_install+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - # Account for people who put trailing slashes in PATH elements. -case $as_dir/ in - ./ | .// | /cC/* | \ - /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ - ?:\\/os2\\/install\\/* | ?:\\/OS2\\/INSTALL\\/* | \ - /usr/ucb/* ) ;; - *) - # OSF1 and SCO ODT 3.0 have their own names for install. - # Don't use installbsd from OSF since it installs stuff as root - # by default. - for ac_prog in ginstall scoinst install; do - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then - if test $ac_prog = install && - grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then - # AIX install. It has an incompatible calling convention. - : - elif test $ac_prog = install && - grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then - # program-specific install script used by HP pwplus--don't use. - : - else - ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" - break 3 - fi - fi - done - done - ;; -esac -done -IFS=$as_save_IFS - - -fi - if test "${ac_cv_path_install+set}" = set; then - INSTALL=$ac_cv_path_install - else - # As a last resort, use the slow shell script. Don't cache a - # value for INSTALL within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the value is a relative name. - INSTALL=$ac_install_sh - fi -fi -{ echo "$as_me:$LINENO: result: $INSTALL" >&5 -echo "${ECHO_T}$INSTALL" >&6; } - -# Use test -z because SunOS4 sh mishandles braces in ${var-val}. -# It thinks the first close brace ends the variable substitution. -test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' - -test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' - -test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' - -{ echo "$as_me:$LINENO: checking whether build environment is sane" >&5 -echo $ECHO_N "checking whether build environment is sane... $ECHO_C" >&6; } -# Just in case -sleep 1 -echo timestamp > conftest.file -# Do `set' in a subshell so we don't clobber the current shell's -# arguments. Must try -L first in case configure is actually a -# symlink; some systems play weird games with the mod time of symlinks -# (eg FreeBSD returns the mod time of the symlink's containing -# directory). -if ( - set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null` - if test "$*" = "X"; then - # -L didn't work. - set X `ls -t $srcdir/configure conftest.file` - fi - rm -f conftest.file - if test "$*" != "X $srcdir/configure conftest.file" \ - && test "$*" != "X conftest.file $srcdir/configure"; then - - # If neither matched, then we have a broken ls. This can happen - # if, for instance, CONFIG_SHELL is bash and it inherits a - # broken ls alias from the environment. This has actually - # happened. Such a system could not be considered "sane". - { { echo "$as_me:$LINENO: error: ls -t appears to fail. Make sure there is not a broken -alias in your environment" >&5 -echo "$as_me: error: ls -t appears to fail. Make sure there is not a broken -alias in your environment" >&2;} - { (exit 1); exit 1; }; } - fi - - test "$2" = conftest.file - ) -then - # Ok. - : -else - { { echo "$as_me:$LINENO: error: newly created file is older than distributed files! -Check your system clock" >&5 -echo "$as_me: error: newly created file is older than distributed files! -Check your system clock" >&2;} - { (exit 1); exit 1; }; } -fi -{ echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } -test "$program_prefix" != NONE && - program_transform_name="s&^&$program_prefix&;$program_transform_name" -# Use a double $ so make ignores it. -test "$program_suffix" != NONE && - program_transform_name="s&\$&$program_suffix&;$program_transform_name" -# Double any \ or $. echo might interpret backslashes. -# By default was `s,x,x', remove it if useless. -cat <<\_ACEOF >conftest.sed -s/[\\$]/&&/g;s/;s,x,x,$// -_ACEOF -program_transform_name=`echo $program_transform_name | sed -f conftest.sed` -rm -f conftest.sed - -# expand $ac_aux_dir to an absolute path -am_aux_dir=`cd $ac_aux_dir && pwd` - -test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing" -# Use eval to expand $SHELL -if eval "$MISSING --run true"; then - am_missing_run="$MISSING --run " -else - am_missing_run= - { echo "$as_me:$LINENO: WARNING: \`missing' script is too old or missing" >&5 -echo "$as_me: WARNING: \`missing' script is too old or missing" >&2;} -fi - -{ echo "$as_me:$LINENO: checking for a thread-safe mkdir -p" >&5 -echo $ECHO_N "checking for a thread-safe mkdir -p... $ECHO_C" >&6; } -if test -z "$MKDIR_P"; then - if test "${ac_cv_path_mkdir+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in mkdir gmkdir; do - for ac_exec_ext in '' $ac_executable_extensions; do - { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; } || continue - case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( - 'mkdir (GNU coreutils) '* | \ - 'mkdir (coreutils) '* | \ - 'mkdir (fileutils) '4.1*) - ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext - break 3;; - esac - done - done -done -IFS=$as_save_IFS - -fi - - if test "${ac_cv_path_mkdir+set}" = set; then - MKDIR_P="$ac_cv_path_mkdir -p" - else - # As a last resort, use the slow shell script. Don't cache a - # value for MKDIR_P within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the value is a relative name. - test -d ./--version && rmdir ./--version - MKDIR_P="$ac_install_sh -d" - fi -fi -{ echo "$as_me:$LINENO: result: $MKDIR_P" >&5 -echo "${ECHO_T}$MKDIR_P" >&6; } - -mkdir_p="$MKDIR_P" -case $mkdir_p in - [\\/$]* | ?:[\\/]*) ;; - */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;; -esac - -for ac_prog in gawk mawk nawk awk -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_AWK+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$AWK"; then - ac_cv_prog_AWK="$AWK" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_AWK="$ac_prog" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -AWK=$ac_cv_prog_AWK -if test -n "$AWK"; then - { echo "$as_me:$LINENO: result: $AWK" >&5 -echo "${ECHO_T}$AWK" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - test -n "$AWK" && break -done - -{ echo "$as_me:$LINENO: checking whether ${MAKE-make} sets \$(MAKE)" >&5 -echo $ECHO_N "checking whether ${MAKE-make} sets \$(MAKE)... $ECHO_C" >&6; } -set x ${MAKE-make}; ac_make=`echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` -if { as_var=ac_cv_prog_make_${ac_make}_set; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.make <<\_ACEOF -SHELL = /bin/sh -all: - @echo '@@@%%%=$(MAKE)=@@@%%%' -_ACEOF -# GNU make sometimes prints "make[1]: Entering...", which would confuse us. -case `${MAKE-make} -f conftest.make 2>/dev/null` in - *@@@%%%=?*=@@@%%%*) - eval ac_cv_prog_make_${ac_make}_set=yes;; - *) - eval ac_cv_prog_make_${ac_make}_set=no;; -esac -rm -f conftest.make -fi -if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - SET_MAKE= -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - SET_MAKE="MAKE=${MAKE-make}" -fi - -rm -rf .tst 2>/dev/null -mkdir .tst 2>/dev/null -if test -d .tst; then - am__leading_dot=. -else - am__leading_dot=_ -fi -rmdir .tst 2>/dev/null - -if test "`cd $srcdir && pwd`" != "`pwd`"; then - # Use -I$(srcdir) only when $(srcdir) != ., so that make's output - # is not polluted with repeated "-I." - am__isrc=' -I$(srcdir)' - # test to see if srcdir already configured - if test -f $srcdir/config.status; then - { { echo "$as_me:$LINENO: error: source directory already configured; run \"make distclean\" there first" >&5 -echo "$as_me: error: source directory already configured; run \"make distclean\" there first" >&2;} - { (exit 1); exit 1; }; } - fi -fi - -# test whether we have cygpath -if test -z "$CYGPATH_W"; then - if (cygpath --version) >/dev/null 2>/dev/null; then - CYGPATH_W='cygpath -w' - else - CYGPATH_W=echo - fi -fi - - -# Define the identity of the package. - PACKAGE='threadpool' - VERSION='1.1d' - - -# Some tools Automake needs. - -ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} - - -AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} - - -AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} - - -AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} - - -MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} - -install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"} - -# Installed binaries are usually stripped using `strip' when the user -# run `make install-strip'. However `strip' might not be the right -# tool to use in cross-compilation environments, therefore Automake -# will honor the `STRIP' environment variable to overrule this program. -if test "$cross_compiling" != no; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. -set dummy ${ac_tool_prefix}strip; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_STRIP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$STRIP"; then - ac_cv_prog_STRIP="$STRIP" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_STRIP="${ac_tool_prefix}strip" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -STRIP=$ac_cv_prog_STRIP -if test -n "$STRIP"; then - { echo "$as_me:$LINENO: result: $STRIP" >&5 -echo "${ECHO_T}$STRIP" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_STRIP"; then - ac_ct_STRIP=$STRIP - # Extract the first word of "strip", so it can be a program name with args. -set dummy strip; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$ac_ct_STRIP"; then - ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_STRIP="strip" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP -if test -n "$ac_ct_STRIP"; then - { echo "$as_me:$LINENO: result: $ac_ct_STRIP" >&5 -echo "${ECHO_T}$ac_ct_STRIP" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - if test "x$ac_ct_STRIP" = x; then - STRIP=":" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&5 -echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&2;} -ac_tool_warned=yes ;; -esac - STRIP=$ac_ct_STRIP - fi -else - STRIP="$ac_cv_prog_STRIP" -fi - -fi -INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" - -# We need awk for the "check" target. The system "awk" is bad on -# some platforms. -# Always define AMTAR for backward compatibility. - -AMTAR=${AMTAR-"${am_missing_run}tar"} - - -{ echo "$as_me:$LINENO: checking how to create a ustar tar archive" >&5 -echo $ECHO_N "checking how to create a ustar tar archive... $ECHO_C" >&6; } -# Loop over all known methods to create a tar archive until one works. -_am_tools='gnutar plaintar pax cpio none' -_am_tools=${am_cv_prog_tar_ustar-$_am_tools} -# Do not fold the above two line into one, because Tru64 sh and -# Solaris sh will not grok spaces in the rhs of `-'. -for _am_tool in $_am_tools -do - case $_am_tool in - gnutar) - for _am_tar in tar gnutar gtar; - do - { echo "$as_me:$LINENO: $_am_tar --version" >&5 - ($_am_tar --version) >&5 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && break - done - am__tar="$_am_tar --format=ustar -chf - "'"$$tardir"' - am__tar_="$_am_tar --format=ustar -chf - "'"$tardir"' - am__untar="$_am_tar -xf -" - ;; - plaintar) - # Must skip GNU tar: if it does not support --format= it doesn't create - # ustar tarball either. - (tar --version) >/dev/null 2>&1 && continue - am__tar='tar chf - "$$tardir"' - am__tar_='tar chf - "$tardir"' - am__untar='tar xf -' - ;; - pax) - am__tar='pax -L -x ustar -w "$$tardir"' - am__tar_='pax -L -x ustar -w "$tardir"' - am__untar='pax -r' - ;; - cpio) - am__tar='find "$$tardir" -print | cpio -o -H ustar -L' - am__tar_='find "$tardir" -print | cpio -o -H ustar -L' - am__untar='cpio -i -H ustar -d' - ;; - none) - am__tar=false - am__tar_=false - am__untar=false - ;; - esac - - # If the value was cached, stop now. We just wanted to have am__tar - # and am__untar set. - test -n "${am_cv_prog_tar_ustar}" && break - - # tar/untar a dummy directory, and stop if the command works - rm -rf conftest.dir - mkdir conftest.dir - echo GrepMe > conftest.dir/file - { echo "$as_me:$LINENO: tardir=conftest.dir && eval $am__tar_ >conftest.tar" >&5 - (tardir=conftest.dir && eval $am__tar_ >conftest.tar) >&5 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } - rm -rf conftest.dir - if test -s conftest.tar; then - { echo "$as_me:$LINENO: $am__untar &5 - ($am__untar &5 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } - grep GrepMe conftest.dir/file >/dev/null 2>&1 && break - fi -done -rm -rf conftest.dir - -if test "${am_cv_prog_tar_ustar+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - am_cv_prog_tar_ustar=$_am_tool -fi - -{ echo "$as_me:$LINENO: result: $am_cv_prog_tar_ustar" >&5 -echo "${ECHO_T}$am_cv_prog_tar_ustar" >&6; } - - - - - - -# Specify required version of autoconf. - - - -# ------------------------------------------------------------------------ -# Check to see if MPI enabled and if any special configuration done -# ------------------------------------------------------------------------ - - - -# Check whether --enable-mpi was given. -if test "${enable_mpi+set}" = set; then - enableval=$enable_mpi; HAVE_PKG_MPI=$enableval -else - HAVE_PKG_MPI=no - -fi - - - -# Check whether --with-mpi-compilers was given. -if test "${with_mpi_compilers+set}" = set; then - withval=$with_mpi_compilers; - if test X${withval} != Xno; then - HAVE_PKG_MPI=yes - if test X${withval} = Xyes; then - # Check for mpicxx, if it does not exist, check for mpic++, if it does - # not exist, use mpiCC instead. - # Extract the first word of "mpicxx", so it can be a program name with args. -set dummy mpicxx; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_MPI_TEMP_CXX+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$MPI_TEMP_CXX"; then - ac_cv_prog_MPI_TEMP_CXX="$MPI_TEMP_CXX" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_MPI_TEMP_CXX="mpicxx" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - - test -z "$ac_cv_prog_MPI_TEMP_CXX" && ac_cv_prog_MPI_TEMP_CXX="no" -fi -fi -MPI_TEMP_CXX=$ac_cv_prog_MPI_TEMP_CXX -if test -n "$MPI_TEMP_CXX"; then - { echo "$as_me:$LINENO: result: $MPI_TEMP_CXX" >&5 -echo "${ECHO_T}$MPI_TEMP_CXX" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - if test X${MPI_TEMP_CXX} = Xno; then - # Extract the first word of "mpic++", so it can be a program name with args. -set dummy mpic++; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_MPI_CXX+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$MPI_CXX"; then - ac_cv_prog_MPI_CXX="$MPI_CXX" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_MPI_CXX="mpic++" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - - test -z "$ac_cv_prog_MPI_CXX" && ac_cv_prog_MPI_CXX="mpiCC" -fi -fi -MPI_CXX=$ac_cv_prog_MPI_CXX -if test -n "$MPI_CXX"; then - { echo "$as_me:$LINENO: result: $MPI_CXX" >&5 -echo "${ECHO_T}$MPI_CXX" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - else - MPI_CXX=${MPI_TEMP_CXX} - fi - MPI_CC=mpicc - MPI_F77=mpif77 - else - if test -f ${withval}/mpicxx; then - MPI_CXX=${withval}/mpicxx - elif test -f ${withval}/mpic++; then - MPI_CXX=${withval}/mpic++ - else - MPI_CXX=${withval}/mpiCC - fi - MPI_CC=${withval}/mpicc - MPI_F77=${withval}/mpif77 - fi - fi - - -fi - - - -# Check whether --with-mpi was given. -if test "${with_mpi+set}" = set; then - withval=$with_mpi; - HAVE_PKG_MPI=yes - MPI_DIR=${withval} - { echo "$as_me:$LINENO: checking MPI directory" >&5 -echo $ECHO_N "checking MPI directory... $ECHO_C" >&6; } - { echo "$as_me:$LINENO: result: ${MPI_DIR}" >&5 -echo "${ECHO_T}${MPI_DIR}" >&6; } - - -fi - - -#AC_ARG_WITH(mpi-include, -#[AC_HELP_STRING([--with-mpi-include],[Obsolete. Use --with-mpi-incdir=DIR instead. Do not prefix DIR with '-I'.])], -#[AC_MSG_ERROR([--with-mpi-include is an obsolte option. Use --with-mpi-incdir=DIR instead. Do not prefix DIR with '-I'. For example '--with-mpi-incdir=/usr/lam_path/include'.])] -#) - - -# Check whether --with-mpi-libs was given. -if test "${with_mpi_libs+set}" = set; then - withval=$with_mpi_libs; - MPI_LIBS=${withval} - { echo "$as_me:$LINENO: checking user-defined MPI libraries" >&5 -echo $ECHO_N "checking user-defined MPI libraries... $ECHO_C" >&6; } - { echo "$as_me:$LINENO: result: ${MPI_LIBS}" >&5 -echo "${ECHO_T}${MPI_LIBS}" >&6; } - - -fi - - - -# Check whether --with-mpi-incdir was given. -if test "${with_mpi_incdir+set}" = set; then - withval=$with_mpi_incdir; - MPI_INC=${withval} - { echo "$as_me:$LINENO: checking user-defined MPI includes" >&5 -echo $ECHO_N "checking user-defined MPI includes... $ECHO_C" >&6; } - { echo "$as_me:$LINENO: result: ${MPI_INC}" >&5 -echo "${ECHO_T}${MPI_INC}" >&6; } - - -fi - - - -# Check whether --with-mpi-libdir was given. -if test "${with_mpi_libdir+set}" = set; then - withval=$with_mpi_libdir; - MPI_LIBDIR=${withval} - { echo "$as_me:$LINENO: checking user-defined MPI library directory" >&5 -echo $ECHO_N "checking user-defined MPI library directory... $ECHO_C" >&6; } - { echo "$as_me:$LINENO: result: ${MPI_LIBDIR}" >&5 -echo "${ECHO_T}${MPI_LIBDIR}" >&6; } - - -fi - - -{ echo "$as_me:$LINENO: checking whether we are using MPI" >&5 -echo $ECHO_N "checking whether we are using MPI... $ECHO_C" >&6; } -{ echo "$as_me:$LINENO: result: ${HAVE_PKG_MPI}" >&5 -echo "${ECHO_T}${HAVE_PKG_MPI}" >&6; } - -if test "X${HAVE_PKG_MPI}" = "Xyes"; then - -cat >>confdefs.h <<\_ACEOF -#define HAVE_MPI -_ACEOF - -fi - - - if test "X${HAVE_PKG_MPI}" = "Xyes"; then - HAVE_MPI_TRUE= - HAVE_MPI_FALSE='#' -else - HAVE_MPI_TRUE='#' - HAVE_MPI_FALSE= -fi - - - - -if test -n "${MPI_CXX}"; then - if test -f ${MPI_CXX}; then - MPI_CXX_EXISTS=yes - else - # Extract the first word of "${MPI_CXX}", so it can be a program name with args. -set dummy ${MPI_CXX}; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_MPI_CXX_EXISTS+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$MPI_CXX_EXISTS"; then - ac_cv_prog_MPI_CXX_EXISTS="$MPI_CXX_EXISTS" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_MPI_CXX_EXISTS="yes" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - - test -z "$ac_cv_prog_MPI_CXX_EXISTS" && ac_cv_prog_MPI_CXX_EXISTS="no" -fi -fi -MPI_CXX_EXISTS=$ac_cv_prog_MPI_CXX_EXISTS -if test -n "$MPI_CXX_EXISTS"; then - { echo "$as_me:$LINENO: result: $MPI_CXX_EXISTS" >&5 -echo "${ECHO_T}$MPI_CXX_EXISTS" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - fi - - if test "X${MPI_CXX_EXISTS}" = "Xyes"; then - CXX=${MPI_CXX} - else - echo "-----" - echo "Cannot find MPI C++ compiler ${MPI_CXX}." - echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" - echo "or specify a C++ compiler using CXX=" - echo "Do not use --with-mpi-compilers if using CXX=" - echo "-----" - { { echo "$as_me:$LINENO: error: MPI C++ compiler (${MPI_CXX}) not found." >&5 -echo "$as_me: error: MPI C++ compiler (${MPI_CXX}) not found." >&2;} - { (exit 1); exit 1; }; } - fi -fi - -if test -n "${MPI_CC}"; then - if test -f ${MPI_CC}; then - MPI_CC_EXISTS=yes - else - # Extract the first word of "${MPI_CC}", so it can be a program name with args. -set dummy ${MPI_CC}; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_MPI_CC_EXISTS+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$MPI_CC_EXISTS"; then - ac_cv_prog_MPI_CC_EXISTS="$MPI_CC_EXISTS" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_MPI_CC_EXISTS="yes" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - - test -z "$ac_cv_prog_MPI_CC_EXISTS" && ac_cv_prog_MPI_CC_EXISTS="no" -fi -fi -MPI_CC_EXISTS=$ac_cv_prog_MPI_CC_EXISTS -if test -n "$MPI_CC_EXISTS"; then - { echo "$as_me:$LINENO: result: $MPI_CC_EXISTS" >&5 -echo "${ECHO_T}$MPI_CC_EXISTS" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - fi - - if test "X${MPI_CC_EXISTS}" = "Xyes"; then - CC=${MPI_CC} - else - echo "-----" - echo "Cannot find MPI C compiler ${MPI_CC}." - echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" - echo "or specify a C compiler using CC=" - echo "Do not use --with-mpi-compilers if using CC=" - echo "-----" - { { echo "$as_me:$LINENO: error: MPI C compiler (${MPI_CC}) not found." >&5 -echo "$as_me: error: MPI C compiler (${MPI_CC}) not found." >&2;} - { (exit 1); exit 1; }; } - fi -fi - -if test "X$ac_cv_use_fortran" = "Xyes"; then - -if test -n "${MPI_F77}"; then - if test -f ${MPI_F77}; then - MPI_F77_EXISTS=yes - else - # Extract the first word of "${MPI_F77}", so it can be a program name with args. -set dummy ${MPI_F77}; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_MPI_F77_EXISTS+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$MPI_F77_EXISTS"; then - ac_cv_prog_MPI_F77_EXISTS="$MPI_F77_EXISTS" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_MPI_F77_EXISTS="yes" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - - test -z "$ac_cv_prog_MPI_F77_EXISTS" && ac_cv_prog_MPI_F77_EXISTS="no" -fi -fi -MPI_F77_EXISTS=$ac_cv_prog_MPI_F77_EXISTS -if test -n "$MPI_F77_EXISTS"; then - { echo "$as_me:$LINENO: result: $MPI_F77_EXISTS" >&5 -echo "${ECHO_T}$MPI_F77_EXISTS" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - fi - - if test "X${MPI_F77_EXISTS}" = "Xyes"; then - F77=${MPI_F77} - else - echo "-----" - echo "Cannot find MPI Fortran compiler ${MPI_F77}." - echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" - echo "or specify a Fortran 77 compiler using F77=" - echo "Do not use --with-mpi-compilers if using F77=" - echo "-----" - { { echo "$as_me:$LINENO: error: MPI Fortran 77 compiler (${MPI_F77}) not found." >&5 -echo "$as_me: error: MPI Fortran 77 compiler (${MPI_F77}) not found." >&2;} - { (exit 1); exit 1; }; } - fi -fi - -fi - -# #np# - can eliminate compiler checks below if your package does not use the -# language corresponding to the check. Please note that if you use -# F77_FUNC to determine Fortran name mangling, you should not remove -# the Fortran compiler check or the check for Fortran flags. Doing -# so will prevent the detection of the proper name mangling in some -# cases. -# ------------------------------------------------------------------------ -# Checks for programs -# ------------------------------------------------------------------------ - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -if test -n "$ac_tool_prefix"; then - for ac_prog in cc gcc - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_CC+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { echo "$as_me:$LINENO: result: $CC" >&5 -echo "${ECHO_T}$CC" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - test -n "$CC" && break - done -fi -if test -z "$CC"; then - ac_ct_CC=$CC - for ac_prog in cc gcc -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_ac_ct_CC+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_CC="$ac_prog" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 -echo "${ECHO_T}$ac_ct_CC" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - test -n "$ac_ct_CC" && break -done - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&5 -echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -fi - - -test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH -See \`config.log' for more details." >&5 -echo "$as_me: error: no acceptable C compiler found in \$PATH -See \`config.log' for more details." >&2;} - { (exit 1); exit 1; }; } - -# Provide some information about the compiler. -echo "$as_me:$LINENO: checking for C compiler version" >&5 -ac_compiler=`set X $ac_compile; echo $2` -{ (ac_try="$ac_compiler --version >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler --version >&5") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } -{ (ac_try="$ac_compiler -v >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler -v >&5") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } -{ (ac_try="$ac_compiler -V >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler -V >&5") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } - -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -ac_clean_files_save=$ac_clean_files -ac_clean_files="$ac_clean_files a.out a.exe b.out" -# Try to create an executable without -o first, disregard a.out. -# It will help us diagnose broken compilers, and finding out an intuition -# of exeext. -{ echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 -echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6; } -ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` -# -# List of possible output files, starting from the most likely. -# The algorithm is not robust to junk in `.', hence go to wildcards (a.*) -# only as a last resort. b.out is created by i960 compilers. -ac_files='a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out' -# -# The IRIX 6 linker writes into existing files which may not be -# executable, retaining their permissions. Remove them first so a -# subsequent execution test works. -ac_rmfiles= -for ac_file in $ac_files -do - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; - * ) ac_rmfiles="$ac_rmfiles $ac_file";; - esac -done -rm -f $ac_rmfiles - -if { (ac_try="$ac_link_default" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link_default") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; then - # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. -# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' -# in a Makefile. We should not override ac_cv_exeext if it was cached, -# so that the user can short-circuit this test for compilers unknown to -# Autoconf. -for ac_file in $ac_files '' -do - test -f "$ac_file" || continue - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) - ;; - [ab].out ) - # We found the default executable, but exeext='' is most - # certainly right. - break;; - *.* ) - if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; - then :; else - ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - fi - # We set ac_cv_exeext here because the later test for it is not - # safe: cross compilers may not add the suffix if given an `-o' - # argument, so we may need to know it at that point already. - # Even if this section looks crufty: it has the advantage of - # actually working. - break;; - * ) - break;; - esac -done -test "$ac_cv_exeext" = no && ac_cv_exeext= - -else - ac_file='' -fi - -{ echo "$as_me:$LINENO: result: $ac_file" >&5 -echo "${ECHO_T}$ac_file" >&6; } -if test -z "$ac_file"; then - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -{ { echo "$as_me:$LINENO: error: C compiler cannot create executables -See \`config.log' for more details." >&5 -echo "$as_me: error: C compiler cannot create executables -See \`config.log' for more details." >&2;} - { (exit 77); exit 77; }; } -fi - -ac_exeext=$ac_cv_exeext - -# Check that the compiler produces executables we can run. If not, either -# the compiler is broken, or we cross compile. -{ echo "$as_me:$LINENO: checking whether the C compiler works" >&5 -echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6; } -# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 -# If not cross compiling, check that we can run a simple program. -if test "$cross_compiling" != yes; then - if { ac_try='./$ac_file' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - cross_compiling=no - else - if test "$cross_compiling" = maybe; then - cross_compiling=yes - else - { { echo "$as_me:$LINENO: error: cannot run C compiled programs. -If you meant to cross compile, use \`--host'. -See \`config.log' for more details." >&5 -echo "$as_me: error: cannot run C compiled programs. -If you meant to cross compile, use \`--host'. -See \`config.log' for more details." >&2;} - { (exit 1); exit 1; }; } - fi - fi -fi -{ echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -rm -f a.out a.exe conftest$ac_cv_exeext b.out -ac_clean_files=$ac_clean_files_save -# Check that the compiler produces executables we can run. If not, either -# the compiler is broken, or we cross compile. -{ echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 -echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6; } -{ echo "$as_me:$LINENO: result: $cross_compiling" >&5 -echo "${ECHO_T}$cross_compiling" >&6; } - -{ echo "$as_me:$LINENO: checking for suffix of executables" >&5 -echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6; } -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; then - # If both `conftest.exe' and `conftest' are `present' (well, observable) -# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will -# work properly (i.e., refer to `conftest.exe'), while it won't with -# `rm'. -for ac_file in conftest.exe conftest conftest.*; do - test -f "$ac_file" || continue - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; - *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - break;; - * ) break;; - esac -done -else - { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link -See \`config.log' for more details." >&5 -echo "$as_me: error: cannot compute suffix of executables: cannot compile and link -See \`config.log' for more details." >&2;} - { (exit 1); exit 1; }; } -fi - -rm -f conftest$ac_cv_exeext -{ echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 -echo "${ECHO_T}$ac_cv_exeext" >&6; } - -rm -f conftest.$ac_ext -EXEEXT=$ac_cv_exeext -ac_exeext=$EXEEXT -{ echo "$as_me:$LINENO: checking for suffix of object files" >&5 -echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6; } -if test "${ac_cv_objext+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.o conftest.obj -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; then - for ac_file in conftest.o conftest.obj conftest.*; do - test -f "$ac_file" || continue; - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf ) ;; - *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` - break;; - esac -done -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile -See \`config.log' for more details." >&5 -echo "$as_me: error: cannot compute suffix of object files: cannot compile -See \`config.log' for more details." >&2;} - { (exit 1); exit 1; }; } -fi - -rm -f conftest.$ac_cv_objext conftest.$ac_ext -fi -{ echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 -echo "${ECHO_T}$ac_cv_objext" >&6; } -OBJEXT=$ac_cv_objext -ac_objext=$OBJEXT -{ echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 -echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6; } -if test "${ac_cv_c_compiler_gnu+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ -#ifndef __GNUC__ - choke me -#endif - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_compiler_gnu=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_compiler_gnu=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -ac_cv_c_compiler_gnu=$ac_compiler_gnu - -fi -{ echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 -echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6; } -GCC=`test $ac_compiler_gnu = yes && echo yes` -ac_test_CFLAGS=${CFLAGS+set} -ac_save_CFLAGS=$CFLAGS -{ echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 -echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6; } -if test "${ac_cv_prog_cc_g+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_save_c_werror_flag=$ac_c_werror_flag - ac_c_werror_flag=yes - ac_cv_prog_cc_g=no - CFLAGS="-g" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_g=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - CFLAGS="" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - : -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_c_werror_flag=$ac_save_c_werror_flag - CFLAGS="-g" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_g=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag -fi -{ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 -echo "${ECHO_T}$ac_cv_prog_cc_g" >&6; } -if test "$ac_test_CFLAGS" = set; then - CFLAGS=$ac_save_CFLAGS -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi -{ echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 -echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; } -if test "${ac_cv_prog_cc_c89+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_cv_prog_cc_c89=no -ac_save_CC=$CC -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -#include -#include -#include -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not '\xHH' hex character constants. - These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get - proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an - array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ -int osf4_cc_array ['\x00' == 0 ? 1 : -1]; - -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; - -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; -int -main () -{ -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - ; - return 0; -} -_ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" -do - CC="$ac_save_CC $ac_arg" - rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_c89=$ac_arg -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - -fi - -rm -f core conftest.err conftest.$ac_objext - test "x$ac_cv_prog_cc_c89" != "xno" && break -done -rm -f conftest.$ac_ext -CC=$ac_save_CC - -fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { echo "$as_me:$LINENO: result: none needed" >&5 -echo "${ECHO_T}none needed" >&6; } ;; - xno) - { echo "$as_me:$LINENO: result: unsupported" >&5 -echo "${ECHO_T}unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c89" - { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 -echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;; -esac - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -DEPDIR="${am__leading_dot}deps" - -ac_config_commands="$ac_config_commands depfiles" - - -am_make=${MAKE-make} -cat > confinc << 'END' -am__doit: - @echo done -.PHONY: am__doit -END -# If we don't find an include directive, just comment out the code. -{ echo "$as_me:$LINENO: checking for style of include used by $am_make" >&5 -echo $ECHO_N "checking for style of include used by $am_make... $ECHO_C" >&6; } -am__include="#" -am__quote= -_am_result=none -# First try GNU make style include. -echo "include confinc" > confmf -# We grep out `Entering directory' and `Leaving directory' -# messages which can occur if `w' ends up in MAKEFLAGS. -# In particular we don't look at `^make:' because GNU make might -# be invoked under some other name (usually "gmake"), in which -# case it prints its new name instead of `make'. -if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then - am__include=include - am__quote= - _am_result=GNU -fi -# Now try BSD make style include. -if test "$am__include" = "#"; then - echo '.include "confinc"' > confmf - if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then - am__include=.include - am__quote="\"" - _am_result=BSD - fi -fi - - -{ echo "$as_me:$LINENO: result: $_am_result" >&5 -echo "${ECHO_T}$_am_result" >&6; } -rm -f confinc confmf - -# Check whether --enable-dependency-tracking was given. -if test "${enable_dependency_tracking+set}" = set; then - enableval=$enable_dependency_tracking; -fi - -if test "x$enable_dependency_tracking" != xno; then - am_depcomp="$ac_aux_dir/depcomp" - AMDEPBACKSLASH='\' -fi - if test "x$enable_dependency_tracking" != xno; then - AMDEP_TRUE= - AMDEP_FALSE='#' -else - AMDEP_TRUE='#' - AMDEP_FALSE= -fi - - - -depcc="$CC" am_compiler_list= - -{ echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 -echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6; } -if test "${am_cv_CC_dependencies_compiler_type+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then - # We make a subdir and do the tests there. Otherwise we can end up - # making bogus files that we don't know about and never remove. For - # instance it was reported that on HP-UX the gcc test will end up - # making a dummy file named `D' -- because `-MD' means `put the output - # in D'. - mkdir conftest.dir - # Copy depcomp to subdir because otherwise we won't find it if we're - # using a relative directory. - cp "$am_depcomp" conftest.dir - cd conftest.dir - # We will build objects and dependencies in a subdirectory because - # it helps to detect inapplicable dependency modes. For instance - # both Tru64's cc and ICC support -MD to output dependencies as a - # side effect of compilation, but ICC will put the dependencies in - # the current directory while Tru64 will put them in the object - # directory. - mkdir sub - - am_cv_CC_dependencies_compiler_type=none - if test "$am_compiler_list" = ""; then - am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` - fi - for depmode in $am_compiler_list; do - # Setup a source with many dependencies, because some compilers - # like to wrap large dependency lists on column 80 (with \), and - # we should not choose a depcomp mode which is confused by this. - # - # We need to recreate these files for each test, as the compiler may - # overwrite some of them when testing with obscure command lines. - # This happens at least with the AIX C compiler. - : > sub/conftest.c - for i in 1 2 3 4 5 6; do - echo '#include "conftst'$i'.h"' >> sub/conftest.c - # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with - # Solaris 8's {/usr,}/bin/sh. - touch sub/conftst$i.h - done - echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf - - case $depmode in - nosideeffect) - # after this tag, mechanisms are not by side-effect, so they'll - # only be used when explicitly requested - if test "x$enable_dependency_tracking" = xyes; then - continue - else - break - fi - ;; - none) break ;; - esac - # We check with `-c' and `-o' for the sake of the "dashmstdout" - # mode. It turns out that the SunPro C++ compiler does not properly - # handle `-M -o', and we need to detect this. - if depmode=$depmode \ - source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ - depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ - $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ - >/dev/null 2>conftest.err && - grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && - ${MAKE-make} -s -f confmf > /dev/null 2>&1; then - # icc doesn't choke on unknown options, it will just issue warnings - # or remarks (even with -Werror). So we grep stderr for any message - # that says an option was ignored or not supported. - # When given -MP, icc 7.0 and 7.1 complain thusly: - # icc: Command line warning: ignoring option '-M'; no argument required - # The diagnosis changed in icc 8.0: - # icc: Command line remark: option '-MP' not supported - if (grep 'ignoring option' conftest.err || - grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else - am_cv_CC_dependencies_compiler_type=$depmode - break - fi - fi - done - - cd .. - rm -rf conftest.dir -else - am_cv_CC_dependencies_compiler_type=none -fi - -fi -{ echo "$as_me:$LINENO: result: $am_cv_CC_dependencies_compiler_type" >&5 -echo "${ECHO_T}$am_cv_CC_dependencies_compiler_type" >&6; } -CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type - - if - test "x$enable_dependency_tracking" != xno \ - && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then - am__fastdepCC_TRUE= - am__fastdepCC_FALSE='#' -else - am__fastdepCC_TRUE='#' - am__fastdepCC_FALSE= -fi - - -ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu -if test -z "$CXX"; then - if test -n "$CCC"; then - CXX=$CCC - else - if test -n "$ac_tool_prefix"; then - for ac_prog in CC g++ c++ cxx - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_CXX+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$CXX"; then - ac_cv_prog_CXX="$CXX" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -CXX=$ac_cv_prog_CXX -if test -n "$CXX"; then - { echo "$as_me:$LINENO: result: $CXX" >&5 -echo "${ECHO_T}$CXX" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - test -n "$CXX" && break - done -fi -if test -z "$CXX"; then - ac_ct_CXX=$CXX - for ac_prog in CC g++ c++ cxx -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$ac_ct_CXX"; then - ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_CXX="$ac_prog" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -ac_ct_CXX=$ac_cv_prog_ac_ct_CXX -if test -n "$ac_ct_CXX"; then - { echo "$as_me:$LINENO: result: $ac_ct_CXX" >&5 -echo "${ECHO_T}$ac_ct_CXX" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - test -n "$ac_ct_CXX" && break -done - - if test "x$ac_ct_CXX" = x; then - CXX="g++" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&5 -echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&2;} -ac_tool_warned=yes ;; -esac - CXX=$ac_ct_CXX - fi -fi - - fi -fi -# Provide some information about the compiler. -echo "$as_me:$LINENO: checking for C++ compiler version" >&5 -ac_compiler=`set X $ac_compile; echo $2` -{ (ac_try="$ac_compiler --version >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler --version >&5") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } -{ (ac_try="$ac_compiler -v >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler -v >&5") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } -{ (ac_try="$ac_compiler -V >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compiler -V >&5") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } - -{ echo "$as_me:$LINENO: checking whether we are using the GNU C++ compiler" >&5 -echo $ECHO_N "checking whether we are using the GNU C++ compiler... $ECHO_C" >&6; } -if test "${ac_cv_cxx_compiler_gnu+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ -#ifndef __GNUC__ - choke me -#endif - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_compiler_gnu=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_compiler_gnu=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -ac_cv_cxx_compiler_gnu=$ac_compiler_gnu - -fi -{ echo "$as_me:$LINENO: result: $ac_cv_cxx_compiler_gnu" >&5 -echo "${ECHO_T}$ac_cv_cxx_compiler_gnu" >&6; } -GXX=`test $ac_compiler_gnu = yes && echo yes` -ac_test_CXXFLAGS=${CXXFLAGS+set} -ac_save_CXXFLAGS=$CXXFLAGS -{ echo "$as_me:$LINENO: checking whether $CXX accepts -g" >&5 -echo $ECHO_N "checking whether $CXX accepts -g... $ECHO_C" >&6; } -if test "${ac_cv_prog_cxx_g+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_save_cxx_werror_flag=$ac_cxx_werror_flag - ac_cxx_werror_flag=yes - ac_cv_prog_cxx_g=no - CXXFLAGS="-g" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cxx_g=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - CXXFLAGS="" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - : -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_cxx_werror_flag=$ac_save_cxx_werror_flag - CXXFLAGS="-g" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cxx_g=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_cxx_werror_flag=$ac_save_cxx_werror_flag -fi -{ echo "$as_me:$LINENO: result: $ac_cv_prog_cxx_g" >&5 -echo "${ECHO_T}$ac_cv_prog_cxx_g" >&6; } -if test "$ac_test_CXXFLAGS" = set; then - CXXFLAGS=$ac_save_CXXFLAGS -elif test $ac_cv_prog_cxx_g = yes; then - if test "$GXX" = yes; then - CXXFLAGS="-g -O2" - else - CXXFLAGS="-g" - fi -else - if test "$GXX" = yes; then - CXXFLAGS="-O2" - else - CXXFLAGS= - fi -fi -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -depcc="$CXX" am_compiler_list= - -{ echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 -echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6; } -if test "${am_cv_CXX_dependencies_compiler_type+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then - # We make a subdir and do the tests there. Otherwise we can end up - # making bogus files that we don't know about and never remove. For - # instance it was reported that on HP-UX the gcc test will end up - # making a dummy file named `D' -- because `-MD' means `put the output - # in D'. - mkdir conftest.dir - # Copy depcomp to subdir because otherwise we won't find it if we're - # using a relative directory. - cp "$am_depcomp" conftest.dir - cd conftest.dir - # We will build objects and dependencies in a subdirectory because - # it helps to detect inapplicable dependency modes. For instance - # both Tru64's cc and ICC support -MD to output dependencies as a - # side effect of compilation, but ICC will put the dependencies in - # the current directory while Tru64 will put them in the object - # directory. - mkdir sub - - am_cv_CXX_dependencies_compiler_type=none - if test "$am_compiler_list" = ""; then - am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` - fi - for depmode in $am_compiler_list; do - # Setup a source with many dependencies, because some compilers - # like to wrap large dependency lists on column 80 (with \), and - # we should not choose a depcomp mode which is confused by this. - # - # We need to recreate these files for each test, as the compiler may - # overwrite some of them when testing with obscure command lines. - # This happens at least with the AIX C compiler. - : > sub/conftest.c - for i in 1 2 3 4 5 6; do - echo '#include "conftst'$i'.h"' >> sub/conftest.c - # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with - # Solaris 8's {/usr,}/bin/sh. - touch sub/conftst$i.h - done - echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf - - case $depmode in - nosideeffect) - # after this tag, mechanisms are not by side-effect, so they'll - # only be used when explicitly requested - if test "x$enable_dependency_tracking" = xyes; then - continue - else - break - fi - ;; - none) break ;; - esac - # We check with `-c' and `-o' for the sake of the "dashmstdout" - # mode. It turns out that the SunPro C++ compiler does not properly - # handle `-M -o', and we need to detect this. - if depmode=$depmode \ - source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ - depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ - $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ - >/dev/null 2>conftest.err && - grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && - ${MAKE-make} -s -f confmf > /dev/null 2>&1; then - # icc doesn't choke on unknown options, it will just issue warnings - # or remarks (even with -Werror). So we grep stderr for any message - # that says an option was ignored or not supported. - # When given -MP, icc 7.0 and 7.1 complain thusly: - # icc: Command line warning: ignoring option '-M'; no argument required - # The diagnosis changed in icc 8.0: - # icc: Command line remark: option '-MP' not supported - if (grep 'ignoring option' conftest.err || - grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else - am_cv_CXX_dependencies_compiler_type=$depmode - break - fi - fi - done - - cd .. - rm -rf conftest.dir -else - am_cv_CXX_dependencies_compiler_type=none -fi - -fi -{ echo "$as_me:$LINENO: result: $am_cv_CXX_dependencies_compiler_type" >&5 -echo "${ECHO_T}$am_cv_CXX_dependencies_compiler_type" >&6; } -CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type - - if - test "x$enable_dependency_tracking" != xno \ - && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then - am__fastdepCXX_TRUE= - am__fastdepCXX_FALSE='#' -else - am__fastdepCXX_TRUE='#' - am__fastdepCXX_FALSE= -fi - - -#AC_PROG_F77(f77 g77 gfortran f90 xlf90 f95) -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. -set dummy ${ac_tool_prefix}ranlib; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_RANLIB+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$RANLIB"; then - ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -RANLIB=$ac_cv_prog_RANLIB -if test -n "$RANLIB"; then - { echo "$as_me:$LINENO: result: $RANLIB" >&5 -echo "${ECHO_T}$RANLIB" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_RANLIB"; then - ac_ct_RANLIB=$RANLIB - # Extract the first word of "ranlib", so it can be a program name with args. -set dummy ranlib; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$ac_ct_RANLIB"; then - ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_RANLIB="ranlib" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB -if test -n "$ac_ct_RANLIB"; then - { echo "$as_me:$LINENO: result: $ac_ct_RANLIB" >&5 -echo "${ECHO_T}$ac_ct_RANLIB" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - if test "x$ac_ct_RANLIB" = x; then - RANLIB=":" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&5 -echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools -whose name does not start with the host triplet. If you think this -configuration is useful to you, please write to autoconf@gnu.org." >&2;} -ac_tool_warned=yes ;; -esac - RANLIB=$ac_ct_RANLIB - fi -else - RANLIB="$ac_cv_prog_RANLIB" -fi - - -# Check if --with-flags present, prepend any specs to FLAGS - - -{ echo "$as_me:$LINENO: checking whether additional CCFLAGS flags should be added" >&5 -echo $ECHO_N "checking whether additional CCFLAGS flags should be added... $ECHO_C" >&6; } - -# Check whether --with-ccflags was given. -if test "${with_ccflags+set}" = set; then - withval=$with_ccflags; -CCFLAGS="${withval} ${CCFLAGS}" -{ echo "$as_me:$LINENO: result: CCFLAGS = ${CCFLAGS}" >&5 -echo "${ECHO_T}CCFLAGS = ${CCFLAGS}" >&6; } - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - -fi - - - -{ echo "$as_me:$LINENO: checking whether additional CXXFLAGS flags should be added" >&5 -echo $ECHO_N "checking whether additional CXXFLAGS flags should be added... $ECHO_C" >&6; } - -# Check whether --with-cxxflags was given. -if test "${with_cxxflags+set}" = set; then - withval=$with_cxxflags; -CXXFLAGS="${withval} ${CXXFLAGS}" -{ echo "$as_me:$LINENO: result: CXXFLAGS = ${CXXFLAGS}" >&5 -echo "${ECHO_T}CXXFLAGS = ${CXXFLAGS}" >&6; } - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - -fi - - - -{ echo "$as_me:$LINENO: checking whether additional CFLAGS flags should be added" >&5 -echo $ECHO_N "checking whether additional CFLAGS flags should be added... $ECHO_C" >&6; } - -# Check whether --with-cflags was given. -if test "${with_cflags+set}" = set; then - withval=$with_cflags; -CFLAGS="${withval} ${CFLAGS}" -{ echo "$as_me:$LINENO: result: CFLAGS = ${CFLAGS}" >&5 -echo "${ECHO_T}CFLAGS = ${CFLAGS}" >&6; } - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - -fi - - -#TAC_ARG_WITH_FLAGS(fflags, FFLAGS) - -{ echo "$as_me:$LINENO: checking whether additional libraries are needed" >&5 -echo $ECHO_N "checking whether additional libraries are needed... $ECHO_C" >&6; } - -# Check whether --with-libs was given. -if test "${with_libs+set}" = set; then - withval=$with_libs; -LIBS="${withval} ${LIBS}" -{ echo "$as_me:$LINENO: result: LIBS = ${LIBS}" >&5 -echo "${ECHO_T}LIBS = ${LIBS}" >&6; } - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - -fi - - - - -{ echo "$as_me:$LINENO: checking whether additional LDFLAGS flags should be added" >&5 -echo $ECHO_N "checking whether additional LDFLAGS flags should be added... $ECHO_C" >&6; } - -# Check whether --with-ldflags was given. -if test "${with_ldflags+set}" = set; then - withval=$with_ldflags; -LDFLAGS="${withval} ${LDFLAGS}" -{ echo "$as_me:$LINENO: result: LDFLAGS = ${LDFLAGS}" >&5 -echo "${ECHO_T}LDFLAGS = ${LDFLAGS}" >&6; } - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - -fi - - - -# ------------------------------------------------------------------------ -# Alternate archiver -# ------------------------------------------------------------------------ - - - -# Check whether --with-ar was given. -if test "${with_ar+set}" = set; then - withval=$with_ar; -{ echo "$as_me:$LINENO: checking user-defined archiver" >&5 -echo $ECHO_N "checking user-defined archiver... $ECHO_C" >&6; } -{ echo "$as_me:$LINENO: result: ${withval}" >&5 -echo "${ECHO_T}${withval}" >&6; } -USE_ALTERNATE_AR=yes -ALTERNATE_AR="${withval}" - - -fi - - -if test -n "${SPECIAL_AR}" && test "X${USE_ALTERNATE_AR}" != "Xyes"; -then - USE_ALTERNATE_AR=yes - ALTERNATE_AR="${SPECIAL_AR}" -fi - -{ echo "$as_me:$LINENO: checking for special archiver command" >&5 -echo $ECHO_N "checking for special archiver command... $ECHO_C" >&6; } -if test "X${USE_ALTERNATE_AR}" = "Xyes"; then - { echo "$as_me:$LINENO: result: ${ALTERNATE_AR}" >&5 -echo "${ECHO_T}${ALTERNATE_AR}" >&6; } - if true; then - USE_ALTERNATE_AR_TRUE= - USE_ALTERNATE_AR_FALSE='#' -else - USE_ALTERNATE_AR_TRUE='#' - USE_ALTERNATE_AR_FALSE= -fi - -else - { echo "$as_me:$LINENO: result: none" >&5 -echo "${ECHO_T}none" >&6; } - if false; then - USE_ALTERNATE_AR_TRUE= - USE_ALTERNATE_AR_FALSE='#' -else - USE_ALTERNATE_AR_TRUE='#' - USE_ALTERNATE_AR_FALSE= -fi - -fi - - - -# ------------------------------------------------------------------------ -# MPI link check -# ------------------------------------------------------------------------ - -ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu -{ echo "$as_me:$LINENO: checking how to run the C++ preprocessor" >&5 -echo $ECHO_N "checking how to run the C++ preprocessor... $ECHO_C" >&6; } -if test -z "$CXXCPP"; then - if test "${ac_cv_prog_CXXCPP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - # Double quotes because CXXCPP needs to be expanded - for CXXCPP in "$CXX -E" "/lib/cpp" - do - ac_preproc_ok=false -for ac_cxx_preproc_warn_flag in '' yes -do - # Use a header file that comes with gcc, so configuring glibc - # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. "Syntax error" is here to catch this case. - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif - Syntax error -_ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || - test ! -s conftest.err - }; then - : -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - # Broken: fails on valid input. -continue -fi - -rm -f conftest.err conftest.$ac_ext - - # OK, works on sane cases. Now check whether nonexistent headers - # can be detected and how. - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -_ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || - test ! -s conftest.err - }; then - # Broken: success on invalid input. -continue -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - # Passes both tests. -ac_preproc_ok=: -break -fi - -rm -f conftest.err conftest.$ac_ext - -done -# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. -rm -f conftest.err conftest.$ac_ext -if $ac_preproc_ok; then - break -fi - - done - ac_cv_prog_CXXCPP=$CXXCPP - -fi - CXXCPP=$ac_cv_prog_CXXCPP -else - ac_cv_prog_CXXCPP=$CXXCPP -fi -{ echo "$as_me:$LINENO: result: $CXXCPP" >&5 -echo "${ECHO_T}$CXXCPP" >&6; } -ac_preproc_ok=false -for ac_cxx_preproc_warn_flag in '' yes -do - # Use a header file that comes with gcc, so configuring glibc - # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. "Syntax error" is here to catch this case. - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif - Syntax error -_ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || - test ! -s conftest.err - }; then - : -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - # Broken: fails on valid input. -continue -fi - -rm -f conftest.err conftest.$ac_ext - - # OK, works on sane cases. Now check whether nonexistent headers - # can be detected and how. - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -_ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || - test ! -s conftest.err - }; then - # Broken: success on invalid input. -continue -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - # Passes both tests. -ac_preproc_ok=: -break -fi - -rm -f conftest.err conftest.$ac_ext - -done -# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. -rm -f conftest.err conftest.$ac_ext -if $ac_preproc_ok; then - : -else - { { echo "$as_me:$LINENO: error: C++ preprocessor \"$CXXCPP\" fails sanity check -See \`config.log' for more details." >&5 -echo "$as_me: error: C++ preprocessor \"$CXXCPP\" fails sanity check -See \`config.log' for more details." >&2;} - { (exit 1); exit 1; }; } -fi - -ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - - - - -if test "X${HAVE_PKG_MPI}" = "Xyes"; then - - if test -n "${MPI_DIR}" && test -z "${MPI_INC}"; then - MPI_INC="${MPI_DIR}/include" - fi - - if test -n "${MPI_INC}"; then - CPPFLAGS="${CPPFLAGS} -I${MPI_INC}" - fi - - ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - - { echo "$as_me:$LINENO: checking for mpi.h" >&5 -echo $ECHO_N "checking for mpi.h... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include "mpi.h" -_ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || - test ! -s conftest.err - }; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - echo "-----" - echo "Cannot link simple MPI program." - echo "Try --with-mpi-compilers to specify MPI compilers." - echo "Or try --with-mpi-libs, --with-mpi-incdir, --with-mpi-libdir" - echo "to specify all the specific MPI compile options." - echo "-----" - { { echo "$as_me:$LINENO: error: MPI cannot link" >&5 -echo "$as_me: error: MPI cannot link" >&2;} - { (exit 1); exit 1; }; } - -fi - -rm -f conftest.err conftest.$ac_ext - - if test -n "${MPI_DIR}" && test -z "${MPI_LIBDIR}"; then - MPI_LIBDIR="${MPI_DIR}/lib" - fi - - if test -n "${MPI_LIBDIR}"; then - LDFLAGS="${LDFLAGS} -L${MPI_LIBDIR}" - fi - - if test -z "${MPI_LIBS}" && test -n "${MPI_LIBDIR}"; then - MPI_LIBS="-lmpi" - fi - - if test -n "${MPI_LIBS}"; then - LIBS="${MPI_LIBS} ${LIBS}" - fi - -# AC_LANG_CPLUSPLUS -# AC_MSG_CHECKING(whether MPI will link using C++ compiler) -# AC_TRY_LINK([#include ], -# [int c; char** v; MPI_Init(&c,&v);], -# [AC_MSG_RESULT(yes)], -# [AC_MSG_RESULT(no) -# echo "-----" -# echo "Cannot link simple MPI program." -# echo "Try --with-mpi-cxx to specify MPI C++ compile script." -# echo "Or try --with-mpi-libs, --with-mpi-incdir, --with-mpi-libdir" -# echo "to specify all the specific MPI compile options." -# echo "-----" -# AC_MSG_ERROR(MPI cannot link)] -# ) - -fi - - -# ------------------------------------------------------------------------ -# Checks for Makefile.export related systems -# ------------------------------------------------------------------------ - -# Check whether --enable-export-makefiles was given. -if test "${enable_export_makefiles+set}" = set; then - enableval=$enable_export_makefiles; ac_cv_use_export_makefiles=$enableval -else - ac_cv_use_export_makefiles=yes -fi - - -{ echo "$as_me:$LINENO: checking whether to build export makefiles" >&5 -echo $ECHO_N "checking whether to build export makefiles... $ECHO_C" >&6; } - -if test "X$ac_cv_use_export_makefiles" != "Xno"; then - - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_EXPORT_MAKEFILES -_ACEOF - - -else - - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - -fi - - if test X${ac_cv_use_export_makefiles} = Xyes; then - USING_EXPORT_MAKEFILES_TRUE= - USING_EXPORT_MAKEFILES_FALSE='#' -else - USING_EXPORT_MAKEFILES_TRUE='#' - USING_EXPORT_MAKEFILES_FALSE= -fi - - -# Check for perl to run scripts (Required dependency) - - - -# Check whether --with-perl was given. -if test "${with_perl+set}" = set; then - withval=$with_perl; -{ echo "$as_me:$LINENO: checking for user supplied perl executable" >&5 -echo $ECHO_N "checking for user supplied perl executable... $ECHO_C" >&6; } -{ echo "$as_me:$LINENO: result: ${withval}" >&5 -echo "${ECHO_T}${withval}" >&6; } -USER_SPECIFIED_PERL=yes -PERL_EXE="${withval}" - -else - -USER_SPECIFIED_PERL=no - -fi - - -if test "X${USER_SPECIFIED_PERL}" = "Xyes"; then - as_ac_File=`echo "ac_cv_file_${PERL_EXE}" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for ${PERL_EXE}" >&5 -echo $ECHO_N "checking for ${PERL_EXE}... $ECHO_C" >&6; } -if { as_var=$as_ac_File; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - test "$cross_compiling" = yes && - { { echo "$as_me:$LINENO: error: cannot check for file existence when cross compiling" >&5 -echo "$as_me: error: cannot check for file existence when cross compiling" >&2;} - { (exit 1); exit 1; }; } -if test -r "${PERL_EXE}"; then - eval "$as_ac_File=yes" -else - eval "$as_ac_File=no" -fi -fi -ac_res=`eval echo '${'$as_ac_File'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_File'}'` = yes; then - HAVE_PERL=yes -else - HAVE_PERL=no -fi - - PERL_EXE=${PERL_EXE} - -else - # Extract the first word of "perl", so it can be a program name with args. -set dummy perl; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_HAVE_PERL+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$HAVE_PERL"; then - ac_cv_prog_HAVE_PERL="$HAVE_PERL" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_HAVE_PERL="yes" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - - test -z "$ac_cv_prog_HAVE_PERL" && ac_cv_prog_HAVE_PERL="no" -fi -fi -HAVE_PERL=$ac_cv_prog_HAVE_PERL -if test -n "$HAVE_PERL"; then - { echo "$as_me:$LINENO: result: $HAVE_PERL" >&5 -echo "${ECHO_T}$HAVE_PERL" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - PERL_EXE=perl - -fi - if test X${HAVE_PERL} = Xyes; then - USING_PERL_TRUE= - USING_PERL_FALSE='#' -else - USING_PERL_TRUE='#' - USING_PERL_FALSE= -fi - - - -if test "X$HAVE_PERL" != "Xyes" && - test "X$ac_cv_use_export_makefiles" != "Xno"; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - { { echo "$as_me:$LINENO: error: Failed to find the perl executable. The flag --enable-export-makefiles requires perl to be either in your path or explicitly defined by the flag --with-perl=. If you do not require the export makefiles to be installed via 'make install', you can disable the export makefiles with --disable-export-makefiles." >&5 -echo "$as_me: error: Failed to find the perl executable. The flag --enable-export-makefiles requires perl to be either in your path or explicitly defined by the flag --with-perl=. If you do not require the export makefiles to be installed via 'make install', you can disable the export makefiles with --disable-export-makefiles." >&2;} - { (exit 1); exit 1; }; } -fi - -# Check for using gnumake to clean up link lines via -# gnumake's "shell" command. Optional dependency. - - - - -# Check whether --with-gnumake was given. -if test "${with_gnumake+set}" = set; then - withval=$with_gnumake; ac_cv_use_gnumake=$withval -else - ac_cv_use_gnumake=no -fi - - -{ echo "$as_me:$LINENO: checking whether gnumake specific code should be enabled" >&5 -echo $ECHO_N "checking whether gnumake specific code should be enabled... $ECHO_C" >&6; } - -if test "X$ac_cv_use_gnumake" != "Xno"; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_GNUMAKE -_ACEOF - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - if test "X$ac_cv_use_gnumake" = "Xyes"; then - USING_GNUMAKE_TRUE= - USING_GNUMAKE_FALSE='#' -else - USING_GNUMAKE_TRUE='#' - USING_GNUMAKE_FALSE= -fi - - - -if test "X$HAVE_PERL" != "Xyes" && - test "X$ac_cv_use_gnumake" != "Xno"; then - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - { { echo "$as_me:$LINENO: error: The flag --with-gnumake requires perl to be in your path. The perl executable can alternatively be explicitly defined by the flag --with-perl=." >&5 -echo "$as_me: error: The flag --with-gnumake requires perl to be in your path. The perl executable can alternatively be explicitly defined by the flag --with-perl=." >&2;} - { (exit 1); exit 1; }; } -fi - - - -# ------------------------------------------------------------------------ -# Checks if tests and examples should be built -# ------------------------------------------------------------------------ - -# #np# - These options can disable the tests and examples of a package. -# #np# - Packages that do not have tests or examples should #-out the -# #np# - option(s) that does (do) not apply. - - -# Check whether --enable-tests was given. -if test "${enable_tests+set}" = set; then - enableval=$enable_tests; ac_cv_use_tests=$enableval -else - ac_cv_use_tests=yes -fi - - -{ echo "$as_me:$LINENO: checking whether to use tests" >&5 -echo $ECHO_N "checking whether to use tests... $ECHO_C" >&6; } - -if test "X$ac_cv_use_tests" != "Xno"; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_TESTS -_ACEOF - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - -# Check whether --enable-tests was given. -if test "${enable_tests+set}" = set; then - enableval=$enable_tests; ac_cv_use_tests=$enableval -else - ac_cv_use_tests=yes -fi - - -# Check whether --enable-threadpool-tests was given. -if test "${enable_threadpool_tests+set}" = set; then - enableval=$enable_threadpool_tests; ac_cv_use_threadpool_tests=$enableval -else - ac_cv_use_threadpool_tests=${ac_cv_use_tests} -fi - - -{ echo "$as_me:$LINENO: checking whether to use threadpool-tests" >&5 -echo $ECHO_N "checking whether to use threadpool-tests... $ECHO_C" >&6; } - -if test "X$ac_cv_use_threadpool_tests" != "Xno"; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_NEW_PACKAGE_TESTS -_ACEOF - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - if test "X$ac_cv_use_threadpool_tests" != "Xno"; then - BUILD_TESTS_TRUE= - BUILD_TESTS_FALSE='#' -else - BUILD_TESTS_TRUE='#' - BUILD_TESTS_FALSE= -fi - - -#TAC_ARG_ENABLE_FEATURE(examples, [Make examples for all Trilinos packages buildable with 'make examples'], EXAMPLES, yes) -#TAC_ARG_ENABLE_FEATURE_SUB_CHECK( new_package, examples, [Make New_Package examples buildable with 'make examples'], NEW_PACKAGE_EXAMPLES) -#AM_CONDITIONAL(BUILD_EXAMPLES, test "X$ac_cv_use_new_package_examples" != "Xno") - -#We now build tests and examples through separate make targets, rather than -#during "make". We still need to conditionally include the test and example -#in SUBDIRS, even though SUB_TEST and SUB_EXAMPLE will never be -#defined, so that the tests and examples are included in the distribution -#tarball. - if test "X$ac_cv_use_sub_test" = "Xyes"; then - SUB_TEST_TRUE= - SUB_TEST_FALSE='#' -else - SUB_TEST_TRUE='#' - SUB_TEST_FALSE= -fi - -#AM_CONDITIONAL(SUB_EXAMPLE, test "X$ac_cv_use_sub_example" = "Xyes") - - -# Check whether --enable-libcheck was given. -if test "${enable_libcheck+set}" = set; then - enableval=$enable_libcheck; ac_cv_use_libcheck=$enableval -else - ac_cv_use_libcheck=yes -fi - - -{ echo "$as_me:$LINENO: checking whether to use libcheck" >&5 -echo $ECHO_N "checking whether to use libcheck... $ECHO_C" >&6; } - -if test "X$ac_cv_use_libcheck" != "Xno"; then - { echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6; } - -cat >>confdefs.h <<\_ACEOF -#define HAVE_LIBCHECK -_ACEOF - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - -# ------------------------------------------------------------------------ -# Specify other directories -# ------------------------------------------------------------------------ - -# enable use of --with-libdirs="-Llibdir1 -Llibdir2 ..." to prepend to LDFLAGS - -{ echo "$as_me:$LINENO: checking whether additional library search paths defined" >&5 -echo $ECHO_N "checking whether additional library search paths defined... $ECHO_C" >&6; } - -# Check whether --with-libdirs was given. -if test "${with_libdirs+set}" = set; then - withval=$with_libdirs; -LDFLAGS="${withval} ${LDFLAGS}" -{ echo "$as_me:$LINENO: result: ${withval}" >&5 -echo "${ECHO_T}${withval}" >&6; } - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - -fi - - -# enable use of --with-incdirs="-Lincdir1 -Lincdir2 ..." to prepend to CPPFLAGS - -{ echo "$as_me:$LINENO: checking whether additional include search paths defined" >&5 -echo $ECHO_N "checking whether additional include search paths defined... $ECHO_C" >&6; } - -# Check whether --with-incdirs was given. -if test "${with_incdirs+set}" = set; then - withval=$with_incdirs; -CPPFLAGS="${withval} ${CPPFLAGS}" -{ echo "$as_me:$LINENO: result: ${withval}" >&5 -echo "${ECHO_T}${withval}" >&6; } - -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } - -fi - - - -# #np# - Yet another opportunity to remove code if you aren't -# using Fortran -# Define F77_FUNC that will be used to link with Fortran subroutines. - trash WORKGXX -#AC_F77_WRAPPERS - -# ------------------------------------------------------------------------ -# Checks for libraries -# ------------------------------------------------------------------------ - -# If tests, examples and libcheck are disabled, we don't have to check -# for these libraries. - -# #np# - -# If a package does not have tests or examples, the corresponding check(s) -# should be pulled out of the "if" statement below. -#if test "X$ac_cv_use_new_package_examples" != "Xno" || test "X$ac_cv_use_libcheck" != "Xno"; then -if test "X$ac_cv_use_threadpool_tests" != "Xno" || test "X$ac_cv_use_libcheck" != "Xno"; then - -{ echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5 -echo $ECHO_N "checking for grep that handles long lines and -e... $ECHO_C" >&6; } -if test "${ac_cv_path_GREP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - # Extract the first word of "grep ggrep" to use in msg output -if test -z "$GREP"; then -set dummy grep ggrep; ac_prog_name=$2 -if test "${ac_cv_path_GREP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_path_GREP_found=false -# Loop through the user's path and test for each of PROGNAME-LIST -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in grep ggrep; do - for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" - { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue - # Check for GNU ac_path_GREP and select it if it is found. - # Check for GNU $ac_path_GREP -case `"$ac_path_GREP" --version 2>&1` in -*GNU*) - ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; -*) - ac_count=0 - echo $ECHO_N "0123456789$ECHO_C" >"conftest.in" - while : - do - cat "conftest.in" "conftest.in" >"conftest.tmp" - mv "conftest.tmp" "conftest.in" - cp "conftest.in" "conftest.nl" - echo 'GREP' >> "conftest.nl" - "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break - diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break - ac_count=`expr $ac_count + 1` - if test $ac_count -gt ${ac_path_GREP_max-0}; then - # Best one so far, save it but keep looking for a better one - ac_cv_path_GREP="$ac_path_GREP" - ac_path_GREP_max=$ac_count - fi - # 10*(2^10) chars as input seems more than enough - test $ac_count -gt 10 && break - done - rm -f conftest.in conftest.tmp conftest.nl conftest.out;; -esac - - - $ac_path_GREP_found && break 3 - done -done - -done -IFS=$as_save_IFS - - -fi - -GREP="$ac_cv_path_GREP" -if test -z "$GREP"; then - { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 -echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} - { (exit 1); exit 1; }; } -fi - -else - ac_cv_path_GREP=$GREP -fi - - -fi -{ echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5 -echo "${ECHO_T}$ac_cv_path_GREP" >&6; } - GREP="$ac_cv_path_GREP" - - -{ echo "$as_me:$LINENO: checking for egrep" >&5 -echo $ECHO_N "checking for egrep... $ECHO_C" >&6; } -if test "${ac_cv_path_EGREP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 - then ac_cv_path_EGREP="$GREP -E" - else - # Extract the first word of "egrep" to use in msg output -if test -z "$EGREP"; then -set dummy egrep; ac_prog_name=$2 -if test "${ac_cv_path_EGREP+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_path_EGREP_found=false -# Loop through the user's path and test for each of PROGNAME-LIST -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in egrep; do - for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" - { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue - # Check for GNU ac_path_EGREP and select it if it is found. - # Check for GNU $ac_path_EGREP -case `"$ac_path_EGREP" --version 2>&1` in -*GNU*) - ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; -*) - ac_count=0 - echo $ECHO_N "0123456789$ECHO_C" >"conftest.in" - while : - do - cat "conftest.in" "conftest.in" >"conftest.tmp" - mv "conftest.tmp" "conftest.in" - cp "conftest.in" "conftest.nl" - echo 'EGREP' >> "conftest.nl" - "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break - diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break - ac_count=`expr $ac_count + 1` - if test $ac_count -gt ${ac_path_EGREP_max-0}; then - # Best one so far, save it but keep looking for a better one - ac_cv_path_EGREP="$ac_path_EGREP" - ac_path_EGREP_max=$ac_count - fi - # 10*(2^10) chars as input seems more than enough - test $ac_count -gt 10 && break - done - rm -f conftest.in conftest.tmp conftest.nl conftest.out;; -esac - - - $ac_path_EGREP_found && break 3 - done -done - -done -IFS=$as_save_IFS - - -fi - -EGREP="$ac_cv_path_EGREP" -if test -z "$EGREP"; then - { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 -echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} - { (exit 1); exit 1; }; } -fi - -else - ac_cv_path_EGREP=$EGREP -fi - - - fi -fi -{ echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5 -echo "${ECHO_T}$ac_cv_path_EGREP" >&6; } - EGREP="$ac_cv_path_EGREP" - - -{ echo "$as_me:$LINENO: checking for ANSI C header files" >&5 -echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; } -if test "${ac_cv_header_stdc+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -#include -#include -#include - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_header_stdc=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_cv_header_stdc=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - -if test $ac_cv_header_stdc = yes; then - # SunOS 4.x string.h does not declare mem*, contrary to ANSI. - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include - -_ACEOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "memchr" >/dev/null 2>&1; then - : -else - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include - -_ACEOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "free" >/dev/null 2>&1; then - : -else - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. - if test "$cross_compiling" = yes; then - : -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -#include -#if ((' ' & 0x0FF) == 0x020) -# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') -# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) -#else -# define ISLOWER(c) \ - (('a' <= (c) && (c) <= 'i') \ - || ('j' <= (c) && (c) <= 'r') \ - || ('s' <= (c) && (c) <= 'z')) -# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) -#endif - -#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) -int -main () -{ - int i; - for (i = 0; i < 256; i++) - if (XOR (islower (i), ISLOWER (i)) - || toupper (i) != TOUPPER (i)) - return 2; - return 0; -} -_ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - : -else - echo "$as_me: program exited with status $ac_status" >&5 -echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -( exit $ac_status ) -ac_cv_header_stdc=no -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext -fi - - -fi -fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 -echo "${ECHO_T}$ac_cv_header_stdc" >&6; } -if test $ac_cv_header_stdc = yes; then - -cat >>confdefs.h <<\_ACEOF -#define STDC_HEADERS 1 -_ACEOF - -fi - -# On IRIX 5.3, sys/types and inttypes.h are conflicting. - - - - - - - - - -for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ - inttypes.h stdint.h unistd.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -$ac_includes_default - -#include <$ac_header> -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - eval "$as_ac_Header=yes" -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - eval "$as_ac_Header=no" -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF - -fi - -done - - - - -acx_pthread_ok=no - -# First, check if the POSIX threads header, pthread.h, is available. -# If it isn't, don't bother looking for the threads libraries. -if test "${ac_cv_header_pthread_h+set}" = set; then - { echo "$as_me:$LINENO: checking for pthread.h" >&5 -echo $ECHO_N "checking for pthread.h... $ECHO_C" >&6; } -if test "${ac_cv_header_pthread_h+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_pthread_h" >&5 -echo "${ECHO_T}$ac_cv_header_pthread_h" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking pthread.h usability" >&5 -echo $ECHO_N "checking pthread.h usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -$ac_includes_default -#include -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_header_compiler=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_compiler=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } - -# Is the header present? -{ echo "$as_me:$LINENO: checking pthread.h presence" >&5 -echo $ECHO_N "checking pthread.h presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -_ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null && { - test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || - test ! -s conftest.err - }; then - ac_header_preproc=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_cxx_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: pthread.h: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: pthread.h: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: pthread.h: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: pthread.h: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: pthread.h: present but cannot be compiled" >&5 -echo "$as_me: WARNING: pthread.h: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: pthread.h: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: pthread.h: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: pthread.h: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: pthread.h: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: pthread.h: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: pthread.h: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: pthread.h: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: pthread.h: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: pthread.h: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: pthread.h: in the future, the compiler will take precedence" >&2;} - ( cat <<\_ASBOX -## --------------------------------- ## -## Report this to hcedwar@sandia.gov ## -## --------------------------------- ## -_ASBOX - ) | sed "s/^/$as_me: WARNING: /" >&2 - ;; -esac -{ echo "$as_me:$LINENO: checking for pthread.h" >&5 -echo $ECHO_N "checking for pthread.h... $ECHO_C" >&6; } -if test "${ac_cv_header_pthread_h+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_cv_header_pthread_h=$ac_header_preproc -fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_pthread_h" >&5 -echo "${ECHO_T}$ac_cv_header_pthread_h" >&6; } - -fi -if test $ac_cv_header_pthread_h = yes; then - : -else - acx_pthread_ok=noheader -fi - - - -# We must check for the threads library under a number of different -# names; the ordering is very important because some systems -# (e.g. DEC) have both -lpthread and -lpthreads, where one of the -# libraries is broken (non-POSIX). - -# First of all, check if the user has set any of the PTHREAD_LIBS, -# etcetera environment variables, and if threads linking works using -# them: -if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - { echo "$as_me:$LINENO: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5 -echo $ECHO_N "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char pthread_join (); -int -main () -{ -return pthread_join (); - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - acx_pthread_ok=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - { echo "$as_me:$LINENO: result: $acx_pthread_ok" >&5 -echo "${ECHO_T}$acx_pthread_ok" >&6; } - if test x"$acx_pthread_ok" = xno; then - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" - fi - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" -fi - -# Create a list of thread flags to try. Items starting with a "-" are -# C compiler flags, and other items are library names, except for "none" -# which indicates that we try without any flags at all. - -acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt" - -# The ordering *is* (sometimes) important. Some notes on the -# individual items follow: - -# pthreads: AIX (must check this before -lpthread) -# none: in case threads are in libc; should be tried before -Kthread and -# other compiler flags to prevent continual compiler warnings -# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) -# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) -# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) -# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) -# -pthreads: Solaris/gcc -# -mthreads: Mingw32/gcc, Lynx/gcc -# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it -# doesn't hurt to check since this sometimes defines pthreads too; -# also defines -D_REENTRANT) -# pthread: Linux, etcetera -# --thread-safe: KAI C++ - -case "${host_cpu}-${host_os}" in - *solaris*) - - # On Solaris (at least, for some versions), libc contains stubbed - # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (We need to link with -pthread or - # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather - # a function called by this macro, so we could check for that, but - # who knows whether they'll stub that too in a future libc.) So, - # we'll just look for -pthreads and -lpthread first: - - acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags" - ;; -esac - -if test x"$acx_pthread_ok" = xno; then -for flag in $acx_pthread_flags; do - - case $flag in - none) - { echo "$as_me:$LINENO: checking whether pthreads work without any flags" >&5 -echo $ECHO_N "checking whether pthreads work without any flags... $ECHO_C" >&6; } - ;; - - -*) - { echo "$as_me:$LINENO: checking whether pthreads work with $flag" >&5 -echo $ECHO_N "checking whether pthreads work with $flag... $ECHO_C" >&6; } - PTHREAD_CFLAGS="$flag" - ;; - - *) - { echo "$as_me:$LINENO: checking for the pthreads library -l$flag" >&5 -echo $ECHO_N "checking for the pthreads library -l$flag... $ECHO_C" >&6; } - PTHREAD_LIBS="-l$flag" - ;; - esac - - save_LIBS="$LIBS" - save_CFLAGS="$CFLAGS" - LIBS="$PTHREAD_LIBS $LIBS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Check for various functions. We must include pthread.h, - # since some functions may be macros. (On the Sequent, we - # need a special flag -Kthread to make this header compile.) - # We check for pthread_join because it is in -lpthread on IRIX - # while pthread_create is in libc. We check for pthread_attr_init - # due to DEC craziness with -lpthreads. We check for - # pthread_cleanup_push because it is one of the few pthread - # functions on Solaris that doesn't have a non-functional libc stub. - # We try pthread_create on general principles. - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -int -main () -{ -pthread_t th; pthread_join(th, 0); - pthread_attr_init(0); pthread_cleanup_push(0, 0); - pthread_create(0,0,0,0); pthread_cleanup_pop(0); - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - acx_pthread_ok=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - { echo "$as_me:$LINENO: result: $acx_pthread_ok" >&5 -echo "${ECHO_T}$acx_pthread_ok" >&6; } - if test "x$acx_pthread_ok" = xyes; then - break; - fi - - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" -done -fi - -# Various other checks: -if test "x$acx_pthread_ok" = xyes; then - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Detect AIX lossage: threads are created detached by default - # and the JOINABLE attribute has a nonstandard name (UNDETACHED). - { echo "$as_me:$LINENO: checking for joinable pthread attribute" >&5 -echo $ECHO_N "checking for joinable pthread attribute... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -int -main () -{ -int attr=PTHREAD_CREATE_JOINABLE; - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - ok=PTHREAD_CREATE_JOINABLE -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ok=unknown -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - if test x"$ok" = xunknown; then - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -int -main () -{ -int attr=PTHREAD_CREATE_UNDETACHED; - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - ok=PTHREAD_CREATE_UNDETACHED -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ok=unknown -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - fi - if test x"$ok" != xPTHREAD_CREATE_JOINABLE; then - -cat >>confdefs.h <<\_ACEOF -#define PTHREAD_CREATE_JOINABLE $ok -_ACEOF - - fi - { echo "$as_me:$LINENO: result: ${ok}" >&5 -echo "${ECHO_T}${ok}" >&6; } - if test x"$ok" = xunknown; then - { echo "$as_me:$LINENO: WARNING: we do not know how to create joinable pthreads" >&5 -echo "$as_me: WARNING: we do not know how to create joinable pthreads" >&2;} - fi - - { echo "$as_me:$LINENO: checking if more special flags are required for pthreads" >&5 -echo $ECHO_N "checking if more special flags are required for pthreads... $ECHO_C" >&6; } - flag=no - case "${host_cpu}-${host_os}" in - *-aix* | *-freebsd*) flag="-D_THREAD_SAFE";; - *solaris* | alpha*-osf*) flag="-D_REENTRANT";; - esac - { echo "$as_me:$LINENO: result: ${flag}" >&5 -echo "${ECHO_T}${flag}" >&6; } - if test "x$flag" != xno; then - PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" - fi - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - # More AIX lossage: must compile with cc_r - # Extract the first word of "cc_r", so it can be a program name with args. -set dummy cc_r; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_prog_PTHREAD_CC+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - if test -n "$PTHREAD_CC"; then - ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_PTHREAD_CC="cc_r" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - - test -z "$ac_cv_prog_PTHREAD_CC" && ac_cv_prog_PTHREAD_CC="${CC}" -fi -fi -PTHREAD_CC=$ac_cv_prog_PTHREAD_CC -if test -n "$PTHREAD_CC"; then - { echo "$as_me:$LINENO: result: $PTHREAD_CC" >&5 -echo "${ECHO_T}$PTHREAD_CC" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - -else - PTHREAD_CC="$CC" -fi - - - - - -# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: -if test x"$acx_pthread_ok" = xyes; then - -cat >>confdefs.h <<\_ACEOF -#define HAVE_PTHREAD 1 -_ACEOF - - : -else - acx_pthread_ok=no - -fi - - -LIBS="$PTHREAD_LIBS $LIBS" -CFLAGS="$CFLAGS $PTHREAD_CFLAGS" -CC="$PTHREAD_CC" - -fi -# end of the list of libraries that don't need to be checked for if -# tests and examples are disabled. - -# ------------------------------------------------------------------------ -# Checks for linker characteristics -# ------------------------------------------------------------------------ - -# Determine libraries needed for linking with Fortran -#AC_F77_LIBRARY_LDFLAGS - - -# ------------------------------------------------------------------------ -# Perform substitutions in output files -# ------------------------------------------------------------------------ - - - -# ------------------------------------------------------------------------ -# Output files -# ------------------------------------------------------------------------ -## -# You will need to change AC_CONFIG_FILES below and Makefile.am -# to add a new directory. -ac_config_files="$ac_config_files Makefile Makefile.export.threadpool src/Makefile test/Makefile" - - -cat >confcache <<\_ACEOF -# This file is a shell script that caches the results of configure -# tests run on this system so they can be shared between configure -# scripts and configure runs, see configure's option --config-cache. -# It is not useful on other systems. If it contains results you don't -# want to keep, you may remove or edit it. -# -# config.status only pays attention to the cache file if you give it -# the --recheck option to rerun configure. -# -# `ac_cv_env_foo' variables (set or unset) will be overridden when -# loading this file, other *unset* `ac_cv_foo' will be assigned the -# following values. - -_ACEOF - -# The following way of writing the cache mishandles newlines in values, -# but we know of no workaround that is simple, portable, and efficient. -# So, we kill variables containing newlines. -# Ultrix sh set writes to stderr and can't be redirected directly, -# and sets the high bit in the cache file unless we assign to the vars. -( - for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do - eval ac_val=\$$ac_var - case $ac_val in #( - *${as_nl}*) - case $ac_var in #( - *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5 -echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;; - esac - case $ac_var in #( - _ | IFS | as_nl) ;; #( - *) $as_unset $ac_var ;; - esac ;; - esac - done - - (set) 2>&1 | - case $as_nl`(ac_space=' '; set) 2>&1` in #( - *${as_nl}ac_space=\ *) - # `set' does not quote correctly, so add quotes (double-quote - # substitution turns \\\\ into \\, and sed turns \\ into \). - sed -n \ - "s/'/'\\\\''/g; - s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" - ;; #( - *) - # `set' quotes correctly as required by POSIX, so do not add quotes. - sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" - ;; - esac | - sort -) | - sed ' - /^ac_cv_env_/b end - t clear - :clear - s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ - t end - s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ - :end' >>confcache -if diff "$cache_file" confcache >/dev/null 2>&1; then :; else - if test -w "$cache_file"; then - test "x$cache_file" != "x/dev/null" && - { echo "$as_me:$LINENO: updating cache $cache_file" >&5 -echo "$as_me: updating cache $cache_file" >&6;} - cat confcache >$cache_file - else - { echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5 -echo "$as_me: not updating unwritable cache $cache_file" >&6;} - fi -fi -rm -f confcache - -test "x$prefix" = xNONE && prefix=$ac_default_prefix -# Let make expand exec_prefix. -test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' - -DEFS=-DHAVE_CONFIG_H - -ac_libobjs= -ac_ltlibobjs= -for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue - # 1. Remove the extension, and $U if already installed. - ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' - ac_i=`echo "$ac_i" | sed "$ac_script"` - # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR - # will be set to the directory where LIBOBJS objects are built. - ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext" - ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo' -done -LIBOBJS=$ac_libobjs - -LTLIBOBJS=$ac_ltlibobjs - - -if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"MAINTAINER_MODE\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"MAINTAINER_MODE\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${HAVE_MPI_TRUE}" && test -z "${HAVE_MPI_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"HAVE_MPI\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"HAVE_MPI\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"AMDEP\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"AMDEP\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCC\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"am__fastdepCC\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCXX\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"am__fastdepCXX\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${USE_ALTERNATE_AR_TRUE}" && test -z "${USE_ALTERNATE_AR_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"USE_ALTERNATE_AR\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"USE_ALTERNATE_AR\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${USE_ALTERNATE_AR_TRUE}" && test -z "${USE_ALTERNATE_AR_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"USE_ALTERNATE_AR\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"USE_ALTERNATE_AR\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${USING_EXPORT_MAKEFILES_TRUE}" && test -z "${USING_EXPORT_MAKEFILES_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"USING_EXPORT_MAKEFILES\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"USING_EXPORT_MAKEFILES\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${USING_PERL_TRUE}" && test -z "${USING_PERL_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"USING_PERL\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"USING_PERL\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${USING_GNUMAKE_TRUE}" && test -z "${USING_GNUMAKE_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"USING_GNUMAKE\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"USING_GNUMAKE\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${BUILD_TESTS_TRUE}" && test -z "${BUILD_TESTS_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"BUILD_TESTS\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"BUILD_TESTS\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi -if test -z "${SUB_TEST_TRUE}" && test -z "${SUB_TEST_FALSE}"; then - { { echo "$as_me:$LINENO: error: conditional \"SUB_TEST\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -echo "$as_me: error: conditional \"SUB_TEST\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi - -: ${CONFIG_STATUS=./config.status} -ac_clean_files_save=$ac_clean_files -ac_clean_files="$ac_clean_files $CONFIG_STATUS" -{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 -echo "$as_me: creating $CONFIG_STATUS" >&6;} -cat >$CONFIG_STATUS <<_ACEOF -#! $SHELL -# Generated by $as_me. -# Run this file to recreate the current configuration. -# Compiler output produced by configure, useful for debugging -# configure, is in config.log if it exists. - -debug=false -ac_cs_recheck=false -ac_cs_silent=false -SHELL=\${CONFIG_SHELL-$SHELL} -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF -## --------------------- ## -## M4sh Initialization. ## -## --------------------- ## - -# Be more Bourne compatible -DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then - emulate sh - NULLCMD=: - # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in - *posix*) set -o posix ;; -esac - -fi - - - - -# PATH needs CR -# Avoid depending upon Character Ranges. -as_cr_letters='abcdefghijklmnopqrstuvwxyz' -as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' -as_cr_Letters=$as_cr_letters$as_cr_LETTERS -as_cr_digits='0123456789' -as_cr_alnum=$as_cr_Letters$as_cr_digits - -# The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then - echo "#! /bin/sh" >conf$$.sh - echo "exit 0" >>conf$$.sh - chmod +x conf$$.sh - if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then - PATH_SEPARATOR=';' - else - PATH_SEPARATOR=: - fi - rm -f conf$$.sh -fi - -# Support unset when possible. -if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then - as_unset=unset -else - as_unset=false -fi - - -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -as_nl=' -' -IFS=" "" $as_nl" - -# Find who we are. Look in the path if we contain no directory separator. -case $0 in - *[\\/]* ) as_myself=$0 ;; - *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break -done -IFS=$as_save_IFS - - ;; -esac -# We did not find ourselves, most probably we were run as `sh COMMAND' -# in which case we are not to be found in the path. -if test "x$as_myself" = x; then - as_myself=$0 -fi -if test ! -f "$as_myself"; then - echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 - { (exit 1); exit 1; } -fi - -# Work around bugs in pre-3.0 UWIN ksh. -for as_var in ENV MAIL MAILPATH -do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -for as_var in \ - LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ - LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ - LC_TELEPHONE LC_TIME -do - if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then - eval $as_var=C; export $as_var - else - ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var - fi -done - -# Required to use basename. -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then - as_basename=basename -else - as_basename=false -fi - - -# Name of the executable. -as_me=`$as_basename -- "$0" || -$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -echo X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - -# CDPATH. -$as_unset CDPATH - - - - as_lineno_1=$LINENO - as_lineno_2=$LINENO - test "x$as_lineno_1" != "x$as_lineno_2" && - test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { - - # Create $as_me.lineno as a copy of $as_myself, but with $LINENO - # uniformly replaced by the line number. The first 'sed' inserts a - # line-number line after each line using $LINENO; the second 'sed' - # does the real work. The second script uses 'N' to pair each - # line-number line with the line containing $LINENO, and appends - # trailing '-' during substitution so that $LINENO is not a special - # case at line end. - # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the - # scripts with optimization help from Paolo Bonzini. Blame Lee - # E. McMahon (1931-1989) for sed's syntax. :-) - sed -n ' - p - /[$]LINENO/= - ' <$as_myself | - sed ' - s/[$]LINENO.*/&-/ - t lineno - b - :lineno - N - :loop - s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ - t loop - s/-\n.*// - ' >$as_me.lineno && - chmod +x "$as_me.lineno" || - { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 - { (exit 1); exit 1; }; } - - # Don't try to exec as it changes $[0], causing all sort of problems - # (the dirname of $[0] is not the place where we might find the - # original and so on. Autoconf is especially sensitive to this). - . "./$as_me.lineno" - # Exit status is that of the last command. - exit -} - - -if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then - as_dirname=dirname -else - as_dirname=false -fi - -ECHO_C= ECHO_N= ECHO_T= -case `echo -n x` in --n*) - case `echo 'x\c'` in - *c*) ECHO_T=' ';; # ECHO_T is single tab character. - *) ECHO_C='\c';; - esac;; -*) - ECHO_N='-n';; -esac - -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -rm -f conf$$ conf$$.exe conf$$.file -if test -d conf$$.dir; then - rm -f conf$$.dir/conf$$.file -else - rm -f conf$$.dir - mkdir conf$$.dir -fi -echo >conf$$.file -if ln -s conf$$.file conf$$ 2>/dev/null; then - as_ln_s='ln -s' - # ... but there are two gotchas: - # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. - # In both cases, we have to default to `cp -p'. - ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || - as_ln_s='cp -p' -elif ln conf$$.file conf$$ 2>/dev/null; then - as_ln_s=ln -else - as_ln_s='cp -p' -fi -rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file -rmdir conf$$.dir 2>/dev/null - -if mkdir -p . 2>/dev/null; then - as_mkdir_p=: -else - test -d ./-p && rmdir ./-p - as_mkdir_p=false -fi - -if test -x / >/dev/null 2>&1; then - as_test_x='test -x' -else - if ls -dL / >/dev/null 2>&1; then - as_ls_L_option=L - else - as_ls_L_option= - fi - as_test_x=' - eval sh -c '\'' - if test -d "$1"; then - test -d "$1/."; - else - case $1 in - -*)set "./$1";; - esac; - case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in - ???[sx]*):;;*)false;;esac;fi - '\'' sh - ' -fi -as_executable_p=$as_test_x - -# Sed expression to map a string onto a valid CPP name. -as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" - -# Sed expression to map a string onto a valid variable name. -as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" - - -exec 6>&1 - -# Save the log message, to keep $[0] and so on meaningful, and to -# report actual input values of CONFIG_FILES etc. instead of their -# values after options handling. -ac_log=" -This file was extended by ThreadPool $as_me 1.1d, which was -generated by GNU Autoconf 2.61. Invocation command line was - - CONFIG_FILES = $CONFIG_FILES - CONFIG_HEADERS = $CONFIG_HEADERS - CONFIG_LINKS = $CONFIG_LINKS - CONFIG_COMMANDS = $CONFIG_COMMANDS - $ $0 $@ - -on `(hostname || uname -n) 2>/dev/null | sed 1q` -" - -_ACEOF - -cat >>$CONFIG_STATUS <<_ACEOF -# Files that config.status was made for. -config_files="$ac_config_files" -config_headers="$ac_config_headers" -config_commands="$ac_config_commands" - -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF -ac_cs_usage="\ -\`$as_me' instantiates files from templates according to the -current configuration. - -Usage: $0 [OPTIONS] [FILE]... - - -h, --help print this help, then exit - -V, --version print version number and configuration settings, then exit - -q, --quiet do not print progress messages - -d, --debug don't remove temporary files - --recheck update $as_me by reconfiguring in the same conditions - --file=FILE[:TEMPLATE] - instantiate the configuration file FILE - --header=FILE[:TEMPLATE] - instantiate the configuration header FILE - -Configuration files: -$config_files - -Configuration headers: -$config_headers - -Configuration commands: -$config_commands - -Report bugs to ." - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF -ac_cs_version="\\ -ThreadPool config.status 1.1d -configured by $0, generated by GNU Autoconf 2.61, - with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" - -Copyright (C) 2006 Free Software Foundation, Inc. -This config.status script is free software; the Free Software Foundation -gives unlimited permission to copy, distribute and modify it." - -ac_pwd='$ac_pwd' -srcdir='$srcdir' -INSTALL='$INSTALL' -MKDIR_P='$MKDIR_P' -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF -# If no file are specified by the user, then we need to provide default -# value. By we need to know if files were specified by the user. -ac_need_defaults=: -while test $# != 0 -do - case $1 in - --*=*) - ac_option=`expr "X$1" : 'X\([^=]*\)='` - ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` - ac_shift=: - ;; - *) - ac_option=$1 - ac_optarg=$2 - ac_shift=shift - ;; - esac - - case $ac_option in - # Handling of the options. - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - ac_cs_recheck=: ;; - --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) - echo "$ac_cs_version"; exit ;; - --debug | --debu | --deb | --de | --d | -d ) - debug=: ;; - --file | --fil | --fi | --f ) - $ac_shift - CONFIG_FILES="$CONFIG_FILES $ac_optarg" - ac_need_defaults=false;; - --header | --heade | --head | --hea ) - $ac_shift - CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" - ac_need_defaults=false;; - --he | --h) - # Conflict between --help and --header - { echo "$as_me: error: ambiguous option: $1 -Try \`$0 --help' for more information." >&2 - { (exit 1); exit 1; }; };; - --help | --hel | -h ) - echo "$ac_cs_usage"; exit ;; - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil | --si | --s) - ac_cs_silent=: ;; - - # This is an error. - -*) { echo "$as_me: error: unrecognized option: $1 -Try \`$0 --help' for more information." >&2 - { (exit 1); exit 1; }; } ;; - - *) ac_config_targets="$ac_config_targets $1" - ac_need_defaults=false ;; - - esac - shift -done - -ac_configure_extra_args= - -if $ac_cs_silent; then - exec 6>/dev/null - ac_configure_extra_args="$ac_configure_extra_args --silent" -fi - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF -if \$ac_cs_recheck; then - echo "running CONFIG_SHELL=$SHELL $SHELL $0 "$ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 - CONFIG_SHELL=$SHELL - export CONFIG_SHELL - exec $SHELL "$0"$ac_configure_args \$ac_configure_extra_args --no-create --no-recursion -fi - -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF -exec 5>>config.log -{ - echo - sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX -## Running $as_me. ## -_ASBOX - echo "$ac_log" -} >&5 - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF -# -# INIT-COMMANDS -# -AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" - -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF - -# Handling of arguments. -for ac_config_target in $ac_config_targets -do - case $ac_config_target in - "src/ThreadPool_config.h") CONFIG_HEADERS="$CONFIG_HEADERS src/ThreadPool_config.h:src/ThreadPool_config.h.in" ;; - "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; - "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; - "Makefile.export.threadpool") CONFIG_FILES="$CONFIG_FILES Makefile.export.threadpool" ;; - "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; - "test/Makefile") CONFIG_FILES="$CONFIG_FILES test/Makefile" ;; - - *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 -echo "$as_me: error: invalid argument: $ac_config_target" >&2;} - { (exit 1); exit 1; }; };; - esac -done - - -# If the user did not use the arguments to specify the items to instantiate, -# then the envvar interface is used. Set only those that are not. -# We use the long form for the default assignment because of an extremely -# bizarre bug on SunOS 4.1.3. -if $ac_need_defaults; then - test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files - test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers - test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands -fi - -# Have a temporary directory for convenience. Make it in the build tree -# simply because there is no reason against having it here, and in addition, -# creating and moving files from /tmp can sometimes cause problems. -# Hook for its removal unless debugging. -# Note that there is a small window in which the directory will not be cleaned: -# after its creation but before its name has been assigned to `$tmp'. -$debug || -{ - tmp= - trap 'exit_status=$? - { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status -' 0 - trap '{ (exit 1); exit 1; }' 1 2 13 15 -} -# Create a (secure) tmp directory for tmp files. - -{ - tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && - test -n "$tmp" && test -d "$tmp" -} || -{ - tmp=./conf$$-$RANDOM - (umask 077 && mkdir "$tmp") -} || -{ - echo "$me: cannot create a temporary directory in ." >&2 - { (exit 1); exit 1; } -} - -# -# Set up the sed scripts for CONFIG_FILES section. -# - -# No need to generate the scripts if there are no CONFIG_FILES. -# This happens for instance when ./config.status config.h -if test -n "$CONFIG_FILES"; then - -_ACEOF - - - -ac_delim='%!_!# ' -for ac_last_try in false false false false false :; do - cat >conf$$subs.sed <<_ACEOF -SHELL!$SHELL$ac_delim -PATH_SEPARATOR!$PATH_SEPARATOR$ac_delim -PACKAGE_NAME!$PACKAGE_NAME$ac_delim -PACKAGE_TARNAME!$PACKAGE_TARNAME$ac_delim -PACKAGE_VERSION!$PACKAGE_VERSION$ac_delim -PACKAGE_STRING!$PACKAGE_STRING$ac_delim -PACKAGE_BUGREPORT!$PACKAGE_BUGREPORT$ac_delim -exec_prefix!$exec_prefix$ac_delim -prefix!$prefix$ac_delim -program_transform_name!$program_transform_name$ac_delim -bindir!$bindir$ac_delim -sbindir!$sbindir$ac_delim -libexecdir!$libexecdir$ac_delim -datarootdir!$datarootdir$ac_delim -datadir!$datadir$ac_delim -sysconfdir!$sysconfdir$ac_delim -sharedstatedir!$sharedstatedir$ac_delim -localstatedir!$localstatedir$ac_delim -includedir!$includedir$ac_delim -oldincludedir!$oldincludedir$ac_delim -docdir!$docdir$ac_delim -infodir!$infodir$ac_delim -htmldir!$htmldir$ac_delim -dvidir!$dvidir$ac_delim -pdfdir!$pdfdir$ac_delim -psdir!$psdir$ac_delim -libdir!$libdir$ac_delim -localedir!$localedir$ac_delim -mandir!$mandir$ac_delim -DEFS!$DEFS$ac_delim -ECHO_C!$ECHO_C$ac_delim -ECHO_N!$ECHO_N$ac_delim -ECHO_T!$ECHO_T$ac_delim -LIBS!$LIBS$ac_delim -build_alias!$build_alias$ac_delim -host_alias!$host_alias$ac_delim -target_alias!$target_alias$ac_delim -MAINTAINER_MODE_TRUE!$MAINTAINER_MODE_TRUE$ac_delim -MAINTAINER_MODE_FALSE!$MAINTAINER_MODE_FALSE$ac_delim -MAINT!$MAINT$ac_delim -build!$build$ac_delim -build_cpu!$build_cpu$ac_delim -build_vendor!$build_vendor$ac_delim -build_os!$build_os$ac_delim -host!$host$ac_delim -host_cpu!$host_cpu$ac_delim -host_vendor!$host_vendor$ac_delim -host_os!$host_os$ac_delim -target!$target$ac_delim -target_cpu!$target_cpu$ac_delim -target_vendor!$target_vendor$ac_delim -target_os!$target_os$ac_delim -INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim -INSTALL_SCRIPT!$INSTALL_SCRIPT$ac_delim -INSTALL_DATA!$INSTALL_DATA$ac_delim -am__isrc!$am__isrc$ac_delim -CYGPATH_W!$CYGPATH_W$ac_delim -PACKAGE!$PACKAGE$ac_delim -VERSION!$VERSION$ac_delim -ACLOCAL!$ACLOCAL$ac_delim -AUTOCONF!$AUTOCONF$ac_delim -AUTOMAKE!$AUTOMAKE$ac_delim -AUTOHEADER!$AUTOHEADER$ac_delim -MAKEINFO!$MAKEINFO$ac_delim -install_sh!$install_sh$ac_delim -STRIP!$STRIP$ac_delim -INSTALL_STRIP_PROGRAM!$INSTALL_STRIP_PROGRAM$ac_delim -mkdir_p!$mkdir_p$ac_delim -AWK!$AWK$ac_delim -SET_MAKE!$SET_MAKE$ac_delim -am__leading_dot!$am__leading_dot$ac_delim -AMTAR!$AMTAR$ac_delim -am__tar!$am__tar$ac_delim -am__untar!$am__untar$ac_delim -MPI_TEMP_CXX!$MPI_TEMP_CXX$ac_delim -MPI_CXX!$MPI_CXX$ac_delim -HAVE_MPI_TRUE!$HAVE_MPI_TRUE$ac_delim -HAVE_MPI_FALSE!$HAVE_MPI_FALSE$ac_delim -MPI_CXX_EXISTS!$MPI_CXX_EXISTS$ac_delim -MPI_CC_EXISTS!$MPI_CC_EXISTS$ac_delim -MPI_F77_EXISTS!$MPI_F77_EXISTS$ac_delim -CC!$CC$ac_delim -CFLAGS!$CFLAGS$ac_delim -LDFLAGS!$LDFLAGS$ac_delim -CPPFLAGS!$CPPFLAGS$ac_delim -ac_ct_CC!$ac_ct_CC$ac_delim -EXEEXT!$EXEEXT$ac_delim -OBJEXT!$OBJEXT$ac_delim -DEPDIR!$DEPDIR$ac_delim -am__include!$am__include$ac_delim -am__quote!$am__quote$ac_delim -AMDEP_TRUE!$AMDEP_TRUE$ac_delim -AMDEP_FALSE!$AMDEP_FALSE$ac_delim -AMDEPBACKSLASH!$AMDEPBACKSLASH$ac_delim -CCDEPMODE!$CCDEPMODE$ac_delim -am__fastdepCC_TRUE!$am__fastdepCC_TRUE$ac_delim -am__fastdepCC_FALSE!$am__fastdepCC_FALSE$ac_delim -_ACEOF - - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then - break - elif $ac_last_try; then - { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 -echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} - { (exit 1); exit 1; }; } - else - ac_delim="$ac_delim!$ac_delim _$ac_delim!! " - fi -done - -ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed` -if test -n "$ac_eof"; then - ac_eof=`echo "$ac_eof" | sort -nru | sed 1q` - ac_eof=`expr $ac_eof + 1` -fi - -cat >>$CONFIG_STATUS <<_ACEOF -cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof -/@[a-zA-Z_][a-zA-Z_0-9]*@/!b -_ACEOF -sed ' -s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g -s/^/s,@/; s/!/@,|#_!!_#|/ -:n -t n -s/'"$ac_delim"'$/,g/; t -s/$/\\/; p -N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n -' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF -CEOF$ac_eof -_ACEOF - - -ac_delim='%!_!# ' -for ac_last_try in false false false false false :; do - cat >conf$$subs.sed <<_ACEOF -CXX!$CXX$ac_delim -CXXFLAGS!$CXXFLAGS$ac_delim -ac_ct_CXX!$ac_ct_CXX$ac_delim -CXXDEPMODE!$CXXDEPMODE$ac_delim -am__fastdepCXX_TRUE!$am__fastdepCXX_TRUE$ac_delim -am__fastdepCXX_FALSE!$am__fastdepCXX_FALSE$ac_delim -RANLIB!$RANLIB$ac_delim -USE_ALTERNATE_AR_TRUE!$USE_ALTERNATE_AR_TRUE$ac_delim -USE_ALTERNATE_AR_FALSE!$USE_ALTERNATE_AR_FALSE$ac_delim -ALTERNATE_AR!$ALTERNATE_AR$ac_delim -CXXCPP!$CXXCPP$ac_delim -USING_EXPORT_MAKEFILES_TRUE!$USING_EXPORT_MAKEFILES_TRUE$ac_delim -USING_EXPORT_MAKEFILES_FALSE!$USING_EXPORT_MAKEFILES_FALSE$ac_delim -PERL_EXE!$PERL_EXE$ac_delim -HAVE_PERL!$HAVE_PERL$ac_delim -USING_PERL_TRUE!$USING_PERL_TRUE$ac_delim -USING_PERL_FALSE!$USING_PERL_FALSE$ac_delim -USING_GNUMAKE_TRUE!$USING_GNUMAKE_TRUE$ac_delim -USING_GNUMAKE_FALSE!$USING_GNUMAKE_FALSE$ac_delim -BUILD_TESTS_TRUE!$BUILD_TESTS_TRUE$ac_delim -BUILD_TESTS_FALSE!$BUILD_TESTS_FALSE$ac_delim -SUB_TEST_TRUE!$SUB_TEST_TRUE$ac_delim -SUB_TEST_FALSE!$SUB_TEST_FALSE$ac_delim -GREP!$GREP$ac_delim -EGREP!$EGREP$ac_delim -PTHREAD_CC!$PTHREAD_CC$ac_delim -PTHREAD_LIBS!$PTHREAD_LIBS$ac_delim -PTHREAD_CFLAGS!$PTHREAD_CFLAGS$ac_delim -ac_aux_dir!$ac_aux_dir$ac_delim -LIBOBJS!$LIBOBJS$ac_delim -LTLIBOBJS!$LTLIBOBJS$ac_delim -_ACEOF - - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 31; then - break - elif $ac_last_try; then - { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 -echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} - { (exit 1); exit 1; }; } - else - ac_delim="$ac_delim!$ac_delim _$ac_delim!! " - fi -done - -ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed` -if test -n "$ac_eof"; then - ac_eof=`echo "$ac_eof" | sort -nru | sed 1q` - ac_eof=`expr $ac_eof + 1` -fi - -cat >>$CONFIG_STATUS <<_ACEOF -cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof -/@[a-zA-Z_][a-zA-Z_0-9]*@/!b end -_ACEOF -sed ' -s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g -s/^/s,@/; s/!/@,|#_!!_#|/ -:n -t n -s/'"$ac_delim"'$/,g/; t -s/$/\\/; p -N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n -' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF -:end -s/|#_!!_#|//g -CEOF$ac_eof -_ACEOF - - -# VPATH may cause trouble with some makes, so we remove $(srcdir), -# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and -# trailing colons and then remove the whole line if VPATH becomes empty -# (actually we leave an empty line to preserve line numbers). -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=/{ -s/:*\$(srcdir):*/:/ -s/:*\${srcdir}:*/:/ -s/:*@srcdir@:*/:/ -s/^\([^=]*=[ ]*\):*/\1/ -s/:*$// -s/^[^=]*=[ ]*$// -}' -fi - -cat >>$CONFIG_STATUS <<\_ACEOF -fi # test -n "$CONFIG_FILES" - - -for ac_tag in :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS -do - case $ac_tag in - :[FHLC]) ac_mode=$ac_tag; continue;; - esac - case $ac_mode$ac_tag in - :[FHL]*:*);; - :L* | :C*:*) { { echo "$as_me:$LINENO: error: Invalid tag $ac_tag." >&5 -echo "$as_me: error: Invalid tag $ac_tag." >&2;} - { (exit 1); exit 1; }; };; - :[FH]-) ac_tag=-:-;; - :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; - esac - ac_save_IFS=$IFS - IFS=: - set x $ac_tag - IFS=$ac_save_IFS - shift - ac_file=$1 - shift - - case $ac_mode in - :L) ac_source=$1;; - :[FH]) - ac_file_inputs= - for ac_f - do - case $ac_f in - -) ac_f="$tmp/stdin";; - *) # Look for the file first in the build tree, then in the source tree - # (if the path is not absolute). The absolute path cannot be DOS-style, - # because $ac_f cannot contain `:'. - test -f "$ac_f" || - case $ac_f in - [\\/$]*) false;; - *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; - esac || - { { echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5 -echo "$as_me: error: cannot find input file: $ac_f" >&2;} - { (exit 1); exit 1; }; };; - esac - ac_file_inputs="$ac_file_inputs $ac_f" - done - - # Let's still pretend it is `configure' which instantiates (i.e., don't - # use $as_me), people would be surprised to read: - # /* config.h. Generated by config.status. */ - configure_input="Generated from "`IFS=: - echo $* | sed 's|^[^:]*/||;s|:[^:]*/|, |g'`" by configure." - if test x"$ac_file" != x-; then - configure_input="$ac_file. $configure_input" - { echo "$as_me:$LINENO: creating $ac_file" >&5 -echo "$as_me: creating $ac_file" >&6;} - fi - - case $ac_tag in - *:-:* | *:-) cat >"$tmp/stdin";; - esac - ;; - esac - - ac_dir=`$as_dirname -- "$ac_file" || -$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$ac_file" : 'X\(//\)[^/]' \| \ - X"$ac_file" : 'X\(//\)$' \| \ - X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || -echo X"$ac_file" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - { as_dir="$ac_dir" - case $as_dir in #( - -*) as_dir=./$as_dir;; - esac - test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { - as_dirs= - while :; do - case $as_dir in #( - *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #( - *) as_qdir=$as_dir;; - esac - as_dirs="'$as_qdir' $as_dirs" - as_dir=`$as_dirname -- "$as_dir" || -$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_dir" : 'X\(//\)[^/]' \| \ - X"$as_dir" : 'X\(//\)$' \| \ - X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -echo X"$as_dir" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - test -d "$as_dir" && break - done - test -z "$as_dirs" || eval "mkdir $as_dirs" - } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 -echo "$as_me: error: cannot create directory $as_dir" >&2;} - { (exit 1); exit 1; }; }; } - ac_builddir=. - -case "$ac_dir" in -.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; -*) - ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` - # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'` - case $ac_top_builddir_sub in - "") ac_top_builddir_sub=. ac_top_build_prefix= ;; - *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; - esac ;; -esac -ac_abs_top_builddir=$ac_pwd -ac_abs_builddir=$ac_pwd$ac_dir_suffix -# for backward compatibility: -ac_top_builddir=$ac_top_build_prefix - -case $srcdir in - .) # We are building in place. - ac_srcdir=. - ac_top_srcdir=$ac_top_builddir_sub - ac_abs_top_srcdir=$ac_pwd ;; - [\\/]* | ?:[\\/]* ) # Absolute name. - ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir - ac_abs_top_srcdir=$srcdir ;; - *) # Relative name. - ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_build_prefix$srcdir - ac_abs_top_srcdir=$ac_pwd/$srcdir ;; -esac -ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix - - - case $ac_mode in - :F) - # - # CONFIG_FILE - # - - case $INSTALL in - [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; - *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; - esac - ac_MKDIR_P=$MKDIR_P - case $MKDIR_P in - [\\/$]* | ?:[\\/]* ) ;; - */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; - esac -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF -# If the template does not know about datarootdir, expand it. -# FIXME: This hack should be removed a few years after 2.60. -ac_datarootdir_hack=; ac_datarootdir_seen= - -case `sed -n '/datarootdir/ { - p - q -} -/@datadir@/p -/@docdir@/p -/@infodir@/p -/@localedir@/p -/@mandir@/p -' $ac_file_inputs` in -*datarootdir*) ac_datarootdir_seen=yes;; -*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) - { echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 -echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF - ac_datarootdir_hack=' - s&@datadir@&$datadir&g - s&@docdir@&$docdir&g - s&@infodir@&$infodir&g - s&@localedir@&$localedir&g - s&@mandir@&$mandir&g - s&\\\${datarootdir}&$datarootdir&g' ;; -esac -_ACEOF - -# Neutralize VPATH when `$srcdir' = `.'. -# Shell code in configure.ac might set extrasub. -# FIXME: do we really want to maintain this feature? -cat >>$CONFIG_STATUS <<_ACEOF - sed "$ac_vpsub -$extrasub -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF -:t -/@[a-zA-Z_][a-zA-Z_0-9]*@/!b -s&@configure_input@&$configure_input&;t t -s&@top_builddir@&$ac_top_builddir_sub&;t t -s&@srcdir@&$ac_srcdir&;t t -s&@abs_srcdir@&$ac_abs_srcdir&;t t -s&@top_srcdir@&$ac_top_srcdir&;t t -s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t -s&@builddir@&$ac_builddir&;t t -s&@abs_builddir@&$ac_abs_builddir&;t t -s&@abs_top_builddir@&$ac_abs_top_builddir&;t t -s&@INSTALL@&$ac_INSTALL&;t t -s&@MKDIR_P@&$ac_MKDIR_P&;t t -$ac_datarootdir_hack -" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" >$tmp/out - -test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && - { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && - { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } && - { echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir' -which seems to be undefined. Please make sure it is defined." >&5 -echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' -which seems to be undefined. Please make sure it is defined." >&2;} - - rm -f "$tmp/stdin" - case $ac_file in - -) cat "$tmp/out"; rm -f "$tmp/out";; - *) rm -f "$ac_file"; mv "$tmp/out" $ac_file;; - esac - ;; - :H) - # - # CONFIG_HEADER - # -_ACEOF - -# Transform confdefs.h into a sed script `conftest.defines', that -# substitutes the proper values into config.h.in to produce config.h. -rm -f conftest.defines conftest.tail -# First, append a space to every undef/define line, to ease matching. -echo 's/$/ /' >conftest.defines -# Then, protect against being on the right side of a sed subst, or in -# an unquoted here document, in config.status. If some macros were -# called several times there might be several #defines for the same -# symbol, which is useless. But do not sort them, since the last -# AC_DEFINE must be honored. -ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* -# These sed commands are passed to sed as "A NAME B PARAMS C VALUE D", where -# NAME is the cpp macro being defined, VALUE is the value it is being given. -# PARAMS is the parameter list in the macro definition--in most cases, it's -# just an empty string. -ac_dA='s,^\\([ #]*\\)[^ ]*\\([ ]*' -ac_dB='\\)[ (].*,\\1define\\2' -ac_dC=' ' -ac_dD=' ,' - -uniq confdefs.h | - sed -n ' - t rset - :rset - s/^[ ]*#[ ]*define[ ][ ]*// - t ok - d - :ok - s/[\\&,]/\\&/g - s/^\('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/ '"$ac_dA"'\1'"$ac_dB"'\2'"${ac_dC}"'\3'"$ac_dD"'/p - s/^\('"$ac_word_re"'\)[ ]*\(.*\)/'"$ac_dA"'\1'"$ac_dB$ac_dC"'\2'"$ac_dD"'/p - ' >>conftest.defines - -# Remove the space that was appended to ease matching. -# Then replace #undef with comments. This is necessary, for -# example, in the case of _POSIX_SOURCE, which is predefined and required -# on some systems where configure will not decide to define it. -# (The regexp can be short, since the line contains either #define or #undef.) -echo 's/ $// -s,^[ #]*u.*,/* & */,' >>conftest.defines - -# Break up conftest.defines: -ac_max_sed_lines=50 - -# First sed command is: sed -f defines.sed $ac_file_inputs >"$tmp/out1" -# Second one is: sed -f defines.sed "$tmp/out1" >"$tmp/out2" -# Third one will be: sed -f defines.sed "$tmp/out2" >"$tmp/out1" -# et cetera. -ac_in='$ac_file_inputs' -ac_out='"$tmp/out1"' -ac_nxt='"$tmp/out2"' - -while : -do - # Write a here document: - cat >>$CONFIG_STATUS <<_ACEOF - # First, check the format of the line: - cat >"\$tmp/defines.sed" <<\\CEOF -/^[ ]*#[ ]*undef[ ][ ]*$ac_word_re[ ]*\$/b def -/^[ ]*#[ ]*define[ ][ ]*$ac_word_re[( ]/b def -b -:def -_ACEOF - sed ${ac_max_sed_lines}q conftest.defines >>$CONFIG_STATUS - echo 'CEOF - sed -f "$tmp/defines.sed"' "$ac_in >$ac_out" >>$CONFIG_STATUS - ac_in=$ac_out; ac_out=$ac_nxt; ac_nxt=$ac_in - sed 1,${ac_max_sed_lines}d conftest.defines >conftest.tail - grep . conftest.tail >/dev/null || break - rm -f conftest.defines - mv conftest.tail conftest.defines -done -rm -f conftest.defines conftest.tail - -echo "ac_result=$ac_in" >>$CONFIG_STATUS -cat >>$CONFIG_STATUS <<\_ACEOF - if test x"$ac_file" != x-; then - echo "/* $configure_input */" >"$tmp/config.h" - cat "$ac_result" >>"$tmp/config.h" - if diff $ac_file "$tmp/config.h" >/dev/null 2>&1; then - { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 -echo "$as_me: $ac_file is unchanged" >&6;} - else - rm -f $ac_file - mv "$tmp/config.h" $ac_file - fi - else - echo "/* $configure_input */" - cat "$ac_result" - fi - rm -f "$tmp/out12" -# Compute $ac_file's index in $config_headers. -_am_stamp_count=1 -for _am_header in $config_headers :; do - case $_am_header in - $ac_file | $ac_file:* ) - break ;; - * ) - _am_stamp_count=`expr $_am_stamp_count + 1` ;; - esac -done -echo "timestamp for $ac_file" >`$as_dirname -- $ac_file || -$as_expr X$ac_file : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X$ac_file : 'X\(//\)[^/]' \| \ - X$ac_file : 'X\(//\)$' \| \ - X$ac_file : 'X\(/\)' \| . 2>/dev/null || -echo X$ac_file | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'`/stamp-h$_am_stamp_count - ;; - - :C) { echo "$as_me:$LINENO: executing $ac_file commands" >&5 -echo "$as_me: executing $ac_file commands" >&6;} - ;; - esac - - - case $ac_file$ac_mode in - "depfiles":C) test x"$AMDEP_TRUE" != x"" || for mf in $CONFIG_FILES; do - # Strip MF so we end up with the name of the file. - mf=`echo "$mf" | sed -e 's/:.*$//'` - # Check whether this is an Automake generated Makefile or not. - # We used to match only the files named `Makefile.in', but - # some people rename them; so instead we look at the file content. - # Grep'ing the first line is not enough: some people post-process - # each Makefile.in and add a new line on top of each file to say so. - # Grep'ing the whole file is not good either: AIX grep has a line - # limit of 2048, but all sed's we know have understand at least 4000. - if sed 10q "$mf" | grep '^#.*generated by automake' > /dev/null 2>&1; then - dirpart=`$as_dirname -- "$mf" || -$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$mf" : 'X\(//\)[^/]' \| \ - X"$mf" : 'X\(//\)$' \| \ - X"$mf" : 'X\(/\)' \| . 2>/dev/null || -echo X"$mf" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - else - continue - fi - # Extract the definition of DEPDIR, am__include, and am__quote - # from the Makefile without running `make'. - DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` - test -z "$DEPDIR" && continue - am__include=`sed -n 's/^am__include = //p' < "$mf"` - test -z "am__include" && continue - am__quote=`sed -n 's/^am__quote = //p' < "$mf"` - # When using ansi2knr, U may be empty or an underscore; expand it - U=`sed -n 's/^U = //p' < "$mf"` - # Find all dependency output files, they are included files with - # $(DEPDIR) in their names. We invoke sed twice because it is the - # simplest approach to changing $(DEPDIR) to its actual value in the - # expansion. - for file in `sed -n " - s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ - sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do - # Make sure the directory exists. - test -f "$dirpart/$file" && continue - fdir=`$as_dirname -- "$file" || -$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$file" : 'X\(//\)[^/]' \| \ - X"$file" : 'X\(//\)$' \| \ - X"$file" : 'X\(/\)' \| . 2>/dev/null || -echo X"$file" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - { as_dir=$dirpart/$fdir - case $as_dir in #( - -*) as_dir=./$as_dir;; - esac - test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { - as_dirs= - while :; do - case $as_dir in #( - *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #( - *) as_qdir=$as_dir;; - esac - as_dirs="'$as_qdir' $as_dirs" - as_dir=`$as_dirname -- "$as_dir" || -$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_dir" : 'X\(//\)[^/]' \| \ - X"$as_dir" : 'X\(//\)$' \| \ - X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -echo X"$as_dir" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - test -d "$as_dir" && break - done - test -z "$as_dirs" || eval "mkdir $as_dirs" - } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 -echo "$as_me: error: cannot create directory $as_dir" >&2;} - { (exit 1); exit 1; }; }; } - # echo "creating $dirpart/$file" - echo '# dummy' > "$dirpart/$file" - done -done - ;; - - esac -done # for ac_tag - - -{ (exit 0); exit 0; } -_ACEOF -chmod +x $CONFIG_STATUS -ac_clean_files=$ac_clean_files_save - - -# configure is writing to config.log, and then calls config.status. -# config.status does its own redirection, appending to config.log. -# Unfortunately, on DOS this fails, as config.log is still kept open -# by configure, so config.status won't be able to write to it; its -# output is simply discarded. So we exec the FD to /dev/null, -# effectively closing config.log, so it can be properly (re)opened and -# appended to by config.status. When coming back to configure, we -# need to make the FD available again. -if test "$no_create" != yes; then - ac_cs_success=: - ac_config_status_args= - test "$silent" = yes && - ac_config_status_args="$ac_config_status_args --quiet" - exec 5>/dev/null - $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false - exec 5>>config.log - # Use ||, not &&, to avoid exiting from the if with $? = 1, which - # would make configure fail if this is the last instruction. - $ac_cs_success || { (exit 1); exit 1; } -fi - - -# Bye World! -echo "---------------------------------------------" -echo "Finished Running ThreadPool Configure Script" -echo "---------------------------------------------" diff --git a/kokkos/basic/optional/ThreadPool/configure.ac b/kokkos/basic/optional/ThreadPool/configure.ac deleted file mode 100644 index 12778f4..0000000 --- a/kokkos/basic/optional/ThreadPool/configure.ac +++ /dev/null @@ -1,240 +0,0 @@ -# ------------------------------------------------------------------------ -# Process this file with autoconf to produce a configure script. -# ------------------------------------------------------------------------ - -# @HEADER -# ************************************************************************ -# -# ThreadPool Package -# Copyright (2008) Sandia Corporation -# -# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -# license for use of this work by or on behalf of the U.S. Government. -# -# This library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as -# published by the Free Software Foundation; either version 2.1 of the -# License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -# USA -# Questions? Contact Carter Edwards (hcedwar@sandia.gov) -# -# ************************************************************************ -# @HEADER - -# ------------------------------------------------------------------------ -# Initialization -# ------------------------------------------------------------------------ - -# This must be the first line in configure.ac. -# Optional 3rd argument is email address for bugs. - -# #np# - package name, version number, and e-mail address below -AC_INIT(ThreadPool, 1.1d, hcedwar@sandia.gov) - -# Hello World! -echo "----------------------------------------" -echo "Running ThreadPool Configure Script" -echo "----------------------------------------" - -# This is to protect against accidentally specifying the wrong -# directory with --srcdir. Any file in that directory will do, -# preferably one that is unlikely to be removed or renamed. - -AC_CONFIG_SRCDIR([src/TPI.c]) - -# Specify directory for auxillary build tools (e.g., install-sh, -# config.sub, config.guess) and M4 files. - -AC_CONFIG_AUX_DIR(config) -# #auto np# - Change file names in next line -# Configure should create src/ThreadPool_config.h from src/ThreadPool_config.h.in - -AM_CONFIG_HEADER(src/ThreadPool_config.h:src/ThreadPool_config.h.in) - -# Allow users to specify their own "install" command. If none is specified, -# the default is install-sh found in the config subdirectory. - -AC_ARG_WITH(install, - [AC_HELP_STRING([--with-install=INSTALL_PROGRAM], - [Use the installation program INSTALL_PROGRAM rather the default that is provided. For example --with-install="/path/install -p"])], - [ - INSTALL=$withval - INSTALL_PROGRAM=$withval - INSTALL_SCRIPT=$withval - INSTALL_DATA="$withval -m 644" - ],) - -# AM_MAINTAINER_MODE turns off maintainer-only makefile targets by -# default, and changes configure to understand a -# --enable-maintainer-mode option. --enable-maintainer-mode turns the -# maintainer-only targets back on. The maintainer-only makefile -# targets permit end users to clean automatically-generated files such -# as configure, which means they have to have autoconf and automake -# installed to repair the damage. AM_MAINTAINER_MODE makes it a bit -# harder for users to shoot themselves in the foot. - -AM_MAINTAINER_MODE - -# Define $build, $host, $target, etc - -AC_CANONICAL_TARGET - -# Use automake - -# - Required version of automake. -AM_INIT_AUTOMAKE(1.10 no-define tar-ustar) - -# Specify required version of autoconf. - -AC_PREREQ(2.61) - -# ------------------------------------------------------------------------ -# Check to see if MPI enabled and if any special configuration done -# ------------------------------------------------------------------------ - -TAC_ARG_CONFIG_MPI - -# #np# - can eliminate compiler checks below if your package does not use the -# language corresponding to the check. Please note that if you use -# F77_FUNC to determine Fortran name mangling, you should not remove -# the Fortran compiler check or the check for Fortran flags. Doing -# so will prevent the detection of the proper name mangling in some -# cases. -# ------------------------------------------------------------------------ -# Checks for programs -# ------------------------------------------------------------------------ - -AC_PROG_CC(cc gcc) -AC_PROG_CXX(CC g++ c++ cxx) -#AC_PROG_F77(f77 g77 gfortran f90 xlf90 f95) -AC_PROG_RANLIB - -# Check if --with-flags present, prepend any specs to FLAGS - -TAC_ARG_WITH_FLAGS(ccflags, CCFLAGS) -TAC_ARG_WITH_FLAGS(cxxflags, CXXFLAGS) -TAC_ARG_WITH_FLAGS(cflags, CFLAGS) -#TAC_ARG_WITH_FLAGS(fflags, FFLAGS) -TAC_ARG_WITH_LIBS -TAC_ARG_WITH_FLAGS(ldflags, LDFLAGS) - -# ------------------------------------------------------------------------ -# Alternate archiver -# ------------------------------------------------------------------------ - -TAC_ARG_WITH_AR - -# ------------------------------------------------------------------------ -# MPI link check -# ------------------------------------------------------------------------ -TAC_ARG_CHECK_MPI - -# ------------------------------------------------------------------------ -# Checks for Makefile.export related systems -# ------------------------------------------------------------------------ -TAC_ARG_ENABLE_EXPORT_MAKEFILES(yes) - -# ------------------------------------------------------------------------ -# Checks if tests and examples should be built -# ------------------------------------------------------------------------ - -# #np# - These options can disable the tests and examples of a package. -# #np# - Packages that do not have tests or examples should #-out the -# #np# - option(s) that does (do) not apply. - -TAC_ARG_ENABLE_FEATURE(tests, [Make tests for all Trilinos packages buildable with 'make tests'], TESTS, yes) -TAC_ARG_ENABLE_FEATURE_SUB_CHECK( threadpool, tests, [Make ThreadPool tests buildable with 'make tests'], NEW_PACKAGE_TESTS) -AM_CONDITIONAL(BUILD_TESTS, test "X$ac_cv_use_threadpool_tests" != "Xno") - -#TAC_ARG_ENABLE_FEATURE(examples, [Make examples for all Trilinos packages buildable with 'make examples'], EXAMPLES, yes) -#TAC_ARG_ENABLE_FEATURE_SUB_CHECK( new_package, examples, [Make New_Package examples buildable with 'make examples'], NEW_PACKAGE_EXAMPLES) -#AM_CONDITIONAL(BUILD_EXAMPLES, test "X$ac_cv_use_new_package_examples" != "Xno") - -#We now build tests and examples through separate make targets, rather than -#during "make". We still need to conditionally include the test and example -#in SUBDIRS, even though SUB_TEST and SUB_EXAMPLE will never be -#defined, so that the tests and examples are included in the distribution -#tarball. -AM_CONDITIONAL(SUB_TEST, test "X$ac_cv_use_sub_test" = "Xyes") -#AM_CONDITIONAL(SUB_EXAMPLE, test "X$ac_cv_use_sub_example" = "Xyes") - -TAC_ARG_ENABLE_FEATURE(libcheck, [Check for some third-party libraries. (Cannot be disabled unless tests and examples are also disabled.)], LIBCHECK, yes) - -# ------------------------------------------------------------------------ -# Specify other directories -# ------------------------------------------------------------------------ - -# enable use of --with-libdirs="-Llibdir1 -Llibdir2 ..." to prepend to LDFLAGS -TAC_ARG_WITH_LIBDIRS -# enable use of --with-incdirs="-Lincdir1 -Lincdir2 ..." to prepend to CPPFLAGS -TAC_ARG_WITH_INCDIRS - -# #np# - Yet another opportunity to remove code if you aren't -# using Fortran -# Define F77_FUNC that will be used to link with Fortran subroutines. - trash WORKGXX -#AC_F77_WRAPPERS - -# ------------------------------------------------------------------------ -# Checks for libraries -# ------------------------------------------------------------------------ - -# If tests, examples and libcheck are disabled, we don't have to check -# for these libraries. - -# #np# - -# If a package does not have tests or examples, the corresponding check(s) -# should be pulled out of the "if" statement below. -#if test "X$ac_cv_use_new_package_examples" != "Xno" || test "X$ac_cv_use_libcheck" != "Xno"; then -if test "X$ac_cv_use_threadpool_tests" != "Xno" || test "X$ac_cv_use_libcheck" != "Xno"; then - -ACX_PTHREAD -LIBS="$PTHREAD_LIBS $LIBS" -CFLAGS="$CFLAGS $PTHREAD_CFLAGS" -CC="$PTHREAD_CC" - -fi -# end of the list of libraries that don't need to be checked for if -# tests and examples are disabled. - -# ------------------------------------------------------------------------ -# Checks for linker characteristics -# ------------------------------------------------------------------------ - -# Determine libraries needed for linking with Fortran -#AC_F77_LIBRARY_LDFLAGS - - -# ------------------------------------------------------------------------ -# Perform substitutions in output files -# ------------------------------------------------------------------------ - -AC_SUBST(ac_aux_dir) - -# ------------------------------------------------------------------------ -# Output files -# ------------------------------------------------------------------------ -## -# You will need to change AC_CONFIG_FILES below and Makefile.am -# to add a new directory. -AC_CONFIG_FILES([ - Makefile - Makefile.export.threadpool - src/Makefile - test/Makefile - ]) - -AC_OUTPUT() - -# Bye World! -echo "---------------------------------------------" -echo "Finished Running ThreadPool Configure Script" -echo "---------------------------------------------" diff --git a/kokkos/basic/optional/ThreadPool/src/CMakeLists.txt b/kokkos/basic/optional/ThreadPool/src/CMakeLists.txt deleted file mode 100644 index 41a1f39..0000000 --- a/kokkos/basic/optional/ThreadPool/src/CMakeLists.txt +++ /dev/null @@ -1,70 +0,0 @@ - -INCLUDE(PackageLibraryMacros) - -# -# A) Package-specific configuration options -# - -PACKAGE_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) - -# -# B) Define the header and source files (and directories) -# - -# -# src -# - -SET(HEADERS "") -SET(SOURCES "") - -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) - -SET(HEADERS ${HEADERS} - ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h - ) - -# -# Core files -# - -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) - -APPEND_SET(HEADERS - TPI.h - TPI.hpp - ) - -APPEND_SET(SOURCES - TPI.c - ) - -# -# Util files -# -APPEND_SET(SOURCES - TPI_Walltime.c - ) - -###################################### - -APPEND_SET(HEADERS - ) - -APPEND_SET(SOURCES - ) - -###################################### -IF (TPL_ENABLE_MPI) -ENDIF() - -# -# C) Define the targets for package's library(s) -# - -PACKAGE_ADD_LIBRARY( - tpi - HEADERS ${HEADERS} - SOURCES ${SOURCES} - ) diff --git a/kokkos/basic/optional/ThreadPool/src/Makefile.am b/kokkos/basic/optional/ThreadPool/src/Makefile.am deleted file mode 100644 index 44c1621..0000000 --- a/kokkos/basic/optional/ThreadPool/src/Makefile.am +++ /dev/null @@ -1,140 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# ThreadPool Package -# Copyright (2008) Sandia Corporation -# -# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -# license for use of this work by or on behalf of the U.S. Government. -# -# This library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as -# published by the Free Software Foundation; either version 2.1 of the -# License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -# USA -# Questions? Contact Carter Edwards (hcedwar@sandia.gov) -# -# ************************************************************************ -# @HEADER - -# The following line helps the test harness recover from build errors. - -all-local: - @echo "" - @echo "Trilinos package ThreadPool subdirectory src built successfully." - @echo "" - -# ------------------------------------------------------------------------ -# For each category, create two variables - NAME and NAME_H. The -# second is the list of headers to be installed, i.e., any header that -# might someday be needed by some other code outside New_Package. The first is -# the list of all source and any other header files. -# ------------------------------------------------------------------------ - -#np# Make sure to list all source files in one of the following categories. - -CORE = $(srcdir)/TPI.c - -CORE_H = \ - $(srcdir)/TPI.h \ - $(srcdir)/TPI.hpp - -UTIL = \ - $(srcdir)/TPI_Walltime.c - - -# ------------------------------------------------------------------------ -# ThreadPool library specifications -# ------------------------------------------------------------------------ -#np# replace new_package with the name of the package being autotool'ed here -THREADPOOL_LIB = libtpi.a - -#np# replace new_package with the name of the package being autotool'ed here -THREADPOOL_H = \ - $(CORE_H) - -#np# replace new_package with the name of the package being autotool'ed here -libtpi_a_SOURCES = \ - $(CORE) \ - $(UTIL) - -#np# replace new_package with the name of the package being autotool'ed here -#EXTRA_libtpi_a_SOURCES = - -include $(top_builddir)/Makefile.export.threadpool - -if USING_GNUMAKE -EXPORT_INCLUDES = $(shell $(PERL_EXE) $(top_srcdir)/config/strip_dup_incl_paths.pl $(THREADPOOL_INCLUDES)) -else -EXPORT_INCLUDES = $(THREADPOOL_INCLUDES) -endif - -AM_CPPFLAGS = $(EXPORT_INCLUDES) - -# ------------------------------------------------------------------------ -# For using a special archiver -# ------------------------------------------------------------------------ - -if USE_ALTERNATE_AR - -libtpi_a_AR = $(ALTERNATE_AR) -else - -libtpi_a_AR = $(AR) cru - -endif - -# ------------------------------------------------------------------------ -# Some C++ compilers create extra .o-files for templates. We need to -# be sure to include these, and this is the hack to do it. -# ------------------------------------------------------------------------ - -libtpi_a_LIBADD = $(XTRALDADD) - -# ------------------------------------------------------------------------ -# List of all libraries to install in $(libexecdir) -# ------------------------------------------------------------------------ - -lib_LIBRARIES = $(THREADPOOL_LIB) - -# ------------------------------------------------------------------------ -# List of all headers to install in $(includedir) -# ------------------------------------------------------------------------ - -#np# replace new_package with the name of the package being autotool'ed here -include_HEADERS = $(THREADPOOL_H) - -# ------------------------------------------------------------------------ -# Special stuff to install in our special $(execincludedir) -# ------------------------------------------------------------------------ - -# SPECIAL NOTE: New_Package_config.h is a machine-dependent file, so we need -# to install it in the machine-dependent directory. However, that is -# not a default installation directory, so we had to create it -# special. - -# All Trilinos headers are now installed in the same directory -execincludedir = $(includedir) -#np# replace new_package with the name of the package being autotool'ed here -nodist_execinclude_HEADERS = ThreadPool_config.h - -# ------------------------------------------------------------------------ -# Files to be deleted by 'make maintainer-clean' -# ------------------------------------------------------------------------ - -MAINTAINERCLEANFILES = Makefile.in - - - - - - diff --git a/kokkos/basic/optional/ThreadPool/src/Makefile.in b/kokkos/basic/optional/ThreadPool/src/Makefile.in deleted file mode 100644 index 4dd7802..0000000 --- a/kokkos/basic/optional/ThreadPool/src/Makefile.in +++ /dev/null @@ -1,680 +0,0 @@ -# Makefile.in generated by automake 1.10 from Makefile.am. -# @configure_input@ - -# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -@SET_MAKE@ - -# @HEADER -# ************************************************************************ -# -# ThreadPool Package -# Copyright (2008) Sandia Corporation -# -# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -# license for use of this work by or on behalf of the U.S. Government. -# -# This library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as -# published by the Free Software Foundation; either version 2.1 of the -# License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -# USA -# Questions? Contact Carter Edwards (hcedwar@sandia.gov) -# -# ************************************************************************ -# @HEADER - -# The following line helps the test harness recover from build errors. - - -VPATH = @srcdir@ -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ -am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd -install_sh_DATA = $(install_sh) -c -m 644 -install_sh_PROGRAM = $(install_sh) -c -install_sh_SCRIPT = $(install_sh) -c -INSTALL_HEADER = $(INSTALL_DATA) -transform = $(program_transform_name) -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -build_triplet = @build@ -host_triplet = @host@ -target_triplet = @target@ -subdir = src -DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ - $(srcdir)/Makefile.in $(srcdir)/ThreadPool_config.h.in -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/config/acx_pthread.m4 \ - $(top_srcdir)/config/tac_arg_check_mpi.m4 \ - $(top_srcdir)/config/tac_arg_config_mpi.m4 \ - $(top_srcdir)/config/tac_arg_enable_export-makefiles.m4 \ - $(top_srcdir)/config/tac_arg_enable_feature.m4 \ - $(top_srcdir)/config/tac_arg_enable_feature_sub_check.m4 \ - $(top_srcdir)/config/tac_arg_with_ar.m4 \ - $(top_srcdir)/config/tac_arg_with_flags.m4 \ - $(top_srcdir)/config/tac_arg_with_incdirs.m4 \ - $(top_srcdir)/config/tac_arg_with_libdirs.m4 \ - $(top_srcdir)/config/tac_arg_with_libs.m4 \ - $(top_srcdir)/config/tac_arg_with_perl.m4 \ - $(top_srcdir)/configure.ac -am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ - $(ACLOCAL_M4) -mkinstalldirs = $(install_sh) -d -CONFIG_HEADER = ThreadPool_config.h -CONFIG_CLEAN_FILES = -am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; -am__vpath_adj = case $$p in \ - $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ - *) f=$$p;; \ - esac; -am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; -am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" \ - "$(DESTDIR)$(execincludedir)" -libLIBRARIES_INSTALL = $(INSTALL_DATA) -LIBRARIES = $(lib_LIBRARIES) -AR = ar -ARFLAGS = cru -libtpi_a_DEPENDENCIES = -am__objects_1 = TPI.$(OBJEXT) -am__objects_2 = TPI_Walltime.$(OBJEXT) -am_libtpi_a_OBJECTS = $(am__objects_1) $(am__objects_2) -libtpi_a_OBJECTS = $(am_libtpi_a_OBJECTS) -DEFAULT_INCLUDES = -I.@am__isrc@ -depcomp = $(SHELL) $(top_srcdir)/config/depcomp -am__depfiles_maybe = depfiles -COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ - $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ -SOURCES = $(libtpi_a_SOURCES) -DIST_SOURCES = $(libtpi_a_SOURCES) -includeHEADERS_INSTALL = $(INSTALL_HEADER) -nodist_execincludeHEADERS_INSTALL = $(INSTALL_HEADER) -HEADERS = $(include_HEADERS) $(nodist_execinclude_HEADERS) -ETAGS = etags -CTAGS = ctags -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) -ACLOCAL = @ACLOCAL@ -ALTERNATE_AR = @ALTERNATE_AR@ -AMTAR = @AMTAR@ -AUTOCONF = @AUTOCONF@ -AUTOHEADER = @AUTOHEADER@ -AUTOMAKE = @AUTOMAKE@ -AWK = @AWK@ -CC = @CC@ -CCDEPMODE = @CCDEPMODE@ -CFLAGS = @CFLAGS@ -CPPFLAGS = @CPPFLAGS@ -CXX = @CXX@ -CXXCPP = @CXXCPP@ -CXXDEPMODE = @CXXDEPMODE@ -CXXFLAGS = @CXXFLAGS@ -CYGPATH_W = @CYGPATH_W@ -DEFS = @DEFS@ -DEPDIR = @DEPDIR@ -ECHO_C = @ECHO_C@ -ECHO_N = @ECHO_N@ -ECHO_T = @ECHO_T@ -EGREP = @EGREP@ -EXEEXT = @EXEEXT@ -GREP = @GREP@ -HAVE_PERL = @HAVE_PERL@ -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -LDFLAGS = @LDFLAGS@ -LIBOBJS = @LIBOBJS@ -LIBS = @LIBS@ -LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ -MAKEINFO = @MAKEINFO@ -MKDIR_P = @MKDIR_P@ -MPI_CC_EXISTS = @MPI_CC_EXISTS@ -MPI_CXX = @MPI_CXX@ -MPI_CXX_EXISTS = @MPI_CXX_EXISTS@ -MPI_F77_EXISTS = @MPI_F77_EXISTS@ -MPI_TEMP_CXX = @MPI_TEMP_CXX@ -OBJEXT = @OBJEXT@ -PACKAGE = @PACKAGE@ -PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ -PACKAGE_NAME = @PACKAGE_NAME@ -PACKAGE_STRING = @PACKAGE_STRING@ -PACKAGE_TARNAME = @PACKAGE_TARNAME@ -PACKAGE_VERSION = @PACKAGE_VERSION@ -PATH_SEPARATOR = @PATH_SEPARATOR@ -PERL_EXE = @PERL_EXE@ -PTHREAD_CC = @PTHREAD_CC@ -PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ -PTHREAD_LIBS = @PTHREAD_LIBS@ -RANLIB = @RANLIB@ -SET_MAKE = @SET_MAKE@ -SHELL = @SHELL@ -STRIP = @STRIP@ -VERSION = @VERSION@ -abs_builddir = @abs_builddir@ -abs_srcdir = @abs_srcdir@ -abs_top_builddir = @abs_top_builddir@ -abs_top_srcdir = @abs_top_srcdir@ -ac_aux_dir = @ac_aux_dir@ -ac_ct_CC = @ac_ct_CC@ -ac_ct_CXX = @ac_ct_CXX@ -am__include = @am__include@ -am__leading_dot = @am__leading_dot@ -am__quote = @am__quote@ -am__tar = @am__tar@ -am__untar = @am__untar@ -bindir = @bindir@ -build = @build@ -build_alias = @build_alias@ -build_cpu = @build_cpu@ -build_os = @build_os@ -build_vendor = @build_vendor@ -builddir = @builddir@ -datadir = @datadir@ -datarootdir = @datarootdir@ -docdir = @docdir@ -dvidir = @dvidir@ -exec_prefix = @exec_prefix@ -host = @host@ -host_alias = @host_alias@ -host_cpu = @host_cpu@ -host_os = @host_os@ -host_vendor = @host_vendor@ -htmldir = @htmldir@ -includedir = @includedir@ -infodir = @infodir@ -install_sh = @install_sh@ -libdir = @libdir@ -libexecdir = @libexecdir@ -localedir = @localedir@ -localstatedir = @localstatedir@ -mandir = @mandir@ -mkdir_p = @mkdir_p@ -oldincludedir = @oldincludedir@ -pdfdir = @pdfdir@ -prefix = @prefix@ -program_transform_name = @program_transform_name@ -psdir = @psdir@ -sbindir = @sbindir@ -sharedstatedir = @sharedstatedir@ -srcdir = @srcdir@ -sysconfdir = @sysconfdir@ -target = @target@ -target_alias = @target_alias@ -target_cpu = @target_cpu@ -target_os = @target_os@ -target_vendor = @target_vendor@ -top_builddir = @top_builddir@ -top_srcdir = @top_srcdir@ - -# ------------------------------------------------------------------------ -# For each category, create two variables - NAME and NAME_H. The -# second is the list of headers to be installed, i.e., any header that -# might someday be needed by some other code outside New_Package. The first is -# the list of all source and any other header files. -# ------------------------------------------------------------------------ - -#np# Make sure to list all source files in one of the following categories. -CORE = $(srcdir)/TPI.c -CORE_H = \ - $(srcdir)/TPI.h \ - $(srcdir)/TPI.hpp - -UTIL = \ - $(srcdir)/TPI_Walltime.c - - -# ------------------------------------------------------------------------ -# ThreadPool library specifications -# ------------------------------------------------------------------------ -#np# replace new_package with the name of the package being autotool'ed here -THREADPOOL_LIB = libtpi.a - -#np# replace new_package with the name of the package being autotool'ed here -THREADPOOL_H = \ - $(CORE_H) - - -#np# replace new_package with the name of the package being autotool'ed here -libtpi_a_SOURCES = \ - $(CORE) \ - $(UTIL) - -@USING_GNUMAKE_FALSE@EXPORT_INCLUDES = $(THREADPOOL_INCLUDES) -@USING_GNUMAKE_TRUE@EXPORT_INCLUDES = $(shell $(PERL_EXE) $(top_srcdir)/config/strip_dup_incl_paths.pl $(THREADPOOL_INCLUDES)) -AM_CPPFLAGS = $(EXPORT_INCLUDES) -@USE_ALTERNATE_AR_FALSE@libtpi_a_AR = $(AR) cru - -# ------------------------------------------------------------------------ -# For using a special archiver -# ------------------------------------------------------------------------ -@USE_ALTERNATE_AR_TRUE@libtpi_a_AR = $(ALTERNATE_AR) - -# ------------------------------------------------------------------------ -# Some C++ compilers create extra .o-files for templates. We need to -# be sure to include these, and this is the hack to do it. -# ------------------------------------------------------------------------ -libtpi_a_LIBADD = $(XTRALDADD) - -# ------------------------------------------------------------------------ -# List of all libraries to install in $(libexecdir) -# ------------------------------------------------------------------------ -lib_LIBRARIES = $(THREADPOOL_LIB) - -# ------------------------------------------------------------------------ -# List of all headers to install in $(includedir) -# ------------------------------------------------------------------------ - -#np# replace new_package with the name of the package being autotool'ed here -include_HEADERS = $(THREADPOOL_H) - -# ------------------------------------------------------------------------ -# Special stuff to install in our special $(execincludedir) -# ------------------------------------------------------------------------ - -# SPECIAL NOTE: New_Package_config.h is a machine-dependent file, so we need -# to install it in the machine-dependent directory. However, that is -# not a default installation directory, so we had to create it -# special. - -# All Trilinos headers are now installed in the same directory -execincludedir = $(includedir) -#np# replace new_package with the name of the package being autotool'ed here -nodist_execinclude_HEADERS = ThreadPool_config.h - -# ------------------------------------------------------------------------ -# Files to be deleted by 'make maintainer-clean' -# ------------------------------------------------------------------------ -MAINTAINERCLEANFILES = Makefile.in -all: ThreadPool_config.h - $(MAKE) $(AM_MAKEFLAGS) all-am - -.SUFFIXES: -.SUFFIXES: .c .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) - @for dep in $?; do \ - case '$(am__configure_deps)' in \ - *$$dep*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ - && exit 0; \ - exit 1;; \ - esac; \ - done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \ - cd $(top_srcdir) && \ - $(AUTOMAKE) --foreign src/Makefile -.PRECIOUS: Makefile -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - @case '$?' in \ - *config.status*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ - *) \ - echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ - cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ - esac; - -$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -ThreadPool_config.h: stamp-h1 - @if test ! -f $@; then \ - rm -f stamp-h1; \ - $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \ - else :; fi - -stamp-h1: $(srcdir)/ThreadPool_config.h.in $(top_builddir)/config.status - @rm -f stamp-h1 - cd $(top_builddir) && $(SHELL) ./config.status src/ThreadPool_config.h -$(srcdir)/ThreadPool_config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) - cd $(top_srcdir) && $(AUTOHEADER) - rm -f stamp-h1 - touch $@ - -distclean-hdr: - -rm -f ThreadPool_config.h stamp-h1 -install-libLIBRARIES: $(lib_LIBRARIES) - @$(NORMAL_INSTALL) - test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" - @list='$(lib_LIBRARIES)'; for p in $$list; do \ - if test -f $$p; then \ - f=$(am__strip_dir) \ - echo " $(libLIBRARIES_INSTALL) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \ - $(libLIBRARIES_INSTALL) "$$p" "$(DESTDIR)$(libdir)/$$f"; \ - else :; fi; \ - done - @$(POST_INSTALL) - @list='$(lib_LIBRARIES)'; for p in $$list; do \ - if test -f $$p; then \ - p=$(am__strip_dir) \ - echo " $(RANLIB) '$(DESTDIR)$(libdir)/$$p'"; \ - $(RANLIB) "$(DESTDIR)$(libdir)/$$p"; \ - else :; fi; \ - done - -uninstall-libLIBRARIES: - @$(NORMAL_UNINSTALL) - @list='$(lib_LIBRARIES)'; for p in $$list; do \ - p=$(am__strip_dir) \ - echo " rm -f '$(DESTDIR)$(libdir)/$$p'"; \ - rm -f "$(DESTDIR)$(libdir)/$$p"; \ - done - -clean-libLIBRARIES: - -test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES) -libtpi.a: $(libtpi_a_OBJECTS) $(libtpi_a_DEPENDENCIES) - -rm -f libtpi.a - $(libtpi_a_AR) libtpi.a $(libtpi_a_OBJECTS) $(libtpi_a_LIBADD) - $(RANLIB) libtpi.a - -mostlyclean-compile: - -rm -f *.$(OBJEXT) - -distclean-compile: - -rm -f *.tab.c - -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/TPI.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/TPI_Walltime.Po@am__quote@ - -.c.o: -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(COMPILE) -c $< - -.c.obj: -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` - -TPI.o: $(srcdir)/TPI.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT TPI.o -MD -MP -MF $(DEPDIR)/TPI.Tpo -c -o TPI.o `test -f '$(srcdir)/TPI.c' || echo '$(srcdir)/'`$(srcdir)/TPI.c -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/TPI.Tpo $(DEPDIR)/TPI.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/TPI.c' object='TPI.o' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o TPI.o `test -f '$(srcdir)/TPI.c' || echo '$(srcdir)/'`$(srcdir)/TPI.c - -TPI.obj: $(srcdir)/TPI.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT TPI.obj -MD -MP -MF $(DEPDIR)/TPI.Tpo -c -o TPI.obj `if test -f '$(srcdir)/TPI.c'; then $(CYGPATH_W) '$(srcdir)/TPI.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/TPI.c'; fi` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/TPI.Tpo $(DEPDIR)/TPI.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/TPI.c' object='TPI.obj' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o TPI.obj `if test -f '$(srcdir)/TPI.c'; then $(CYGPATH_W) '$(srcdir)/TPI.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/TPI.c'; fi` - -TPI_Walltime.o: $(srcdir)/TPI_Walltime.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT TPI_Walltime.o -MD -MP -MF $(DEPDIR)/TPI_Walltime.Tpo -c -o TPI_Walltime.o `test -f '$(srcdir)/TPI_Walltime.c' || echo '$(srcdir)/'`$(srcdir)/TPI_Walltime.c -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/TPI_Walltime.Tpo $(DEPDIR)/TPI_Walltime.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/TPI_Walltime.c' object='TPI_Walltime.o' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o TPI_Walltime.o `test -f '$(srcdir)/TPI_Walltime.c' || echo '$(srcdir)/'`$(srcdir)/TPI_Walltime.c - -TPI_Walltime.obj: $(srcdir)/TPI_Walltime.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT TPI_Walltime.obj -MD -MP -MF $(DEPDIR)/TPI_Walltime.Tpo -c -o TPI_Walltime.obj `if test -f '$(srcdir)/TPI_Walltime.c'; then $(CYGPATH_W) '$(srcdir)/TPI_Walltime.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/TPI_Walltime.c'; fi` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/TPI_Walltime.Tpo $(DEPDIR)/TPI_Walltime.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/TPI_Walltime.c' object='TPI_Walltime.obj' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o TPI_Walltime.obj `if test -f '$(srcdir)/TPI_Walltime.c'; then $(CYGPATH_W) '$(srcdir)/TPI_Walltime.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/TPI_Walltime.c'; fi` -install-includeHEADERS: $(include_HEADERS) - @$(NORMAL_INSTALL) - test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" - @list='$(include_HEADERS)'; for p in $$list; do \ - if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ - f=$(am__strip_dir) \ - echo " $(includeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(includedir)/$$f'"; \ - $(includeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(includedir)/$$f"; \ - done - -uninstall-includeHEADERS: - @$(NORMAL_UNINSTALL) - @list='$(include_HEADERS)'; for p in $$list; do \ - f=$(am__strip_dir) \ - echo " rm -f '$(DESTDIR)$(includedir)/$$f'"; \ - rm -f "$(DESTDIR)$(includedir)/$$f"; \ - done -install-nodist_execincludeHEADERS: $(nodist_execinclude_HEADERS) - @$(NORMAL_INSTALL) - test -z "$(execincludedir)" || $(MKDIR_P) "$(DESTDIR)$(execincludedir)" - @list='$(nodist_execinclude_HEADERS)'; for p in $$list; do \ - if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ - f=$(am__strip_dir) \ - echo " $(nodist_execincludeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(execincludedir)/$$f'"; \ - $(nodist_execincludeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(execincludedir)/$$f"; \ - done - -uninstall-nodist_execincludeHEADERS: - @$(NORMAL_UNINSTALL) - @list='$(nodist_execinclude_HEADERS)'; for p in $$list; do \ - f=$(am__strip_dir) \ - echo " rm -f '$(DESTDIR)$(execincludedir)/$$f'"; \ - rm -f "$(DESTDIR)$(execincludedir)/$$f"; \ - done - -ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - mkid -fID $$unique -tags: TAGS - -TAGS: $(HEADERS) $(SOURCES) ThreadPool_config.h.in $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS) ThreadPool_config.h.in $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ - test -n "$$unique" || unique=$$empty_fix; \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - $$tags $$unique; \ - fi -ctags: CTAGS -CTAGS: $(HEADERS) $(SOURCES) ThreadPool_config.h.in $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS) ThreadPool_config.h.in $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(CTAGS_ARGS)$$tags$$unique" \ - || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ - $$tags $$unique - -GTAGS: - here=`$(am__cd) $(top_builddir) && pwd` \ - && cd $(top_srcdir) \ - && gtags -i $(GTAGS_ARGS) $$here - -distclean-tags: - -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags - -distdir: $(DISTFILES) - @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - list='$(DISTFILES)'; \ - dist_files=`for file in $$list; do echo $$file; done | \ - sed -e "s|^$$srcdirstrip/||;t" \ - -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ - case $$dist_files in \ - */*) $(MKDIR_P) `echo "$$dist_files" | \ - sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ - sort -u` ;; \ - esac; \ - for file in $$dist_files; do \ - if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ - if test -d $$d/$$file; then \ - dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ - if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ - cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ - fi; \ - cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ - else \ - test -f $(distdir)/$$file \ - || cp -p $$d/$$file $(distdir)/$$file \ - || exit 1; \ - fi; \ - done -check-am: all-am -check: check-am -all-am: Makefile $(LIBRARIES) $(HEADERS) ThreadPool_config.h all-local -installdirs: - for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(execincludedir)"; do \ - test -z "$$dir" || $(MKDIR_P) "$$dir"; \ - done -install: install-am -install-exec: install-exec-am -install-data: install-data-am -uninstall: uninstall-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am - -installcheck: installcheck-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - `test -z '$(STRIP)' || \ - echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) - -maintainer-clean-generic: - @echo "This command is intended for maintainers to use" - @echo "it deletes files that may require special tools to rebuild." - -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) -clean: clean-am - -clean-am: clean-generic clean-libLIBRARIES mostlyclean-am - -distclean: distclean-am - -rm -rf ./$(DEPDIR) - -rm -f Makefile -distclean-am: clean-am distclean-compile distclean-generic \ - distclean-hdr distclean-tags - -dvi: dvi-am - -dvi-am: - -html: html-am - -info: info-am - -info-am: - -install-data-am: install-includeHEADERS - -install-dvi: install-dvi-am - -install-exec-am: install-libLIBRARIES \ - install-nodist_execincludeHEADERS - -install-html: install-html-am - -install-info: install-info-am - -install-man: - -install-pdf: install-pdf-am - -install-ps: install-ps-am - -installcheck-am: - -maintainer-clean: maintainer-clean-am - -rm -rf ./$(DEPDIR) - -rm -f Makefile -maintainer-clean-am: distclean-am maintainer-clean-generic - -mostlyclean: mostlyclean-am - -mostlyclean-am: mostlyclean-compile mostlyclean-generic - -pdf: pdf-am - -pdf-am: - -ps: ps-am - -ps-am: - -uninstall-am: uninstall-includeHEADERS uninstall-libLIBRARIES \ - uninstall-nodist_execincludeHEADERS - -.MAKE: install-am install-strip - -.PHONY: CTAGS GTAGS all all-am all-local check check-am clean \ - clean-generic clean-libLIBRARIES ctags distclean \ - distclean-compile distclean-generic distclean-hdr \ - distclean-tags distdir dvi dvi-am html html-am info info-am \ - install install-am install-data install-data-am install-dvi \ - install-dvi-am install-exec install-exec-am install-html \ - install-html-am install-includeHEADERS install-info \ - install-info-am install-libLIBRARIES install-man \ - install-nodist_execincludeHEADERS install-pdf install-pdf-am \ - install-ps install-ps-am install-strip installcheck \ - installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \ - uninstall-am uninstall-includeHEADERS uninstall-libLIBRARIES \ - uninstall-nodist_execincludeHEADERS - - -all-local: - @echo "" - @echo "Trilinos package ThreadPool subdirectory src built successfully." - @echo "" - -#np# replace new_package with the name of the package being autotool'ed here -#EXTRA_libtpi_a_SOURCES = - -include $(top_builddir)/Makefile.export.threadpool -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/kokkos/basic/optional/ThreadPool/src/TPI.c b/kokkos/basic/optional/ThreadPool/src/TPI.c deleted file mode 100644 index f2b1566..0000000 --- a/kokkos/basic/optional/ThreadPool/src/TPI.c +++ /dev/null @@ -1,1016 +0,0 @@ -/*------------------------------------------------------------------------*/ -/* TPI: Thread Pool Interface */ -/* Copyright (2008) Sandia Corporation */ -/* */ -/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ -/* license for use of this work by or on behalf of the U.S. Government. */ -/* */ -/* This library is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU Lesser General Public License as */ -/* published by the Free Software Foundation; either version 2.1 of the */ -/* License, or (at your option) any later version. */ -/* */ -/* This library is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* Lesser General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU Lesser General Public */ -/* License along with this library; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ -/* USA */ -/*------------------------------------------------------------------------*/ -/** - * @author H. Carter Edwards - */ - -/*--------------------------------------------------------------------*/ - -#include -#include -#include -#include - -/*--------------------------------------------------------------------*/ -/*----------- PTHREAD CONFIGURATION (BEGIN) --------------------------*/ -/*--------------------------------------------------------------------*/ - -#if defined( HAVE_PTHREAD ) - -#include -#include -#include - -/*--------------------------------------------------------------------*/ -/*---------------- COMPILER SPECIFICS (BEGIN) ------------------------*/ -/*--------------------------------------------------------------------*/ - -/* Performance is heavily impacted by an - * atomic decrement of the work counter. - * Optimize this if at all possible. - */ - -#if defined( __INTEL_COMPILER ) - -#define THREADPOOL_CONFIG "PTHREAD SCHED_YIELD" - -#elif defined( __linux__ ) && \ - defined( __GNUC__ ) && ( 4 <= __GNUC__ ) - -#define THREADPOOL_CONFIG "PTHREAD SCHED_YIELD ATOMIC_SYNC" - -#define atomic_fetch_and_decrement( VALUE_PTR ) \ - __sync_fetch_and_sub( VALUE_PTR , 1 ) - -#else - -#define THREADPOOL_CONFIG "PTHREAD SCHED_YIELD" - -#endif - -#if ! defined( atomic_fetch_and_decrement ) - -static int atomic_fetch_and_decrement( volatile int * value ) -{ - static pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER ; - int result ; - while ( EBUSY == pthread_mutex_trylock( & atomic_lock ) ); - result = ( *value )-- ; - pthread_mutex_unlock( & atomic_lock ); - return result ; -} - -#endif - -/*--------------------------------------------------------------------*/ -/*---------------- COMPILER SPECIFICS (END) --------------------------*/ -/*--------------------------------------------------------------------*/ - -typedef pthread_mutex_t local_lock_type ; - -#else /* ! defined( HAVE_PTHREAD ) */ - -#define THREADPOOL_CONFIG "NO THREADING" - -typedef int local_lock_type ; - -#endif - -/*--------------------------------------------------------------------*/ -/*----------- PTHREAD CONFIGURATION (END) ----------------------------*/ -/*--------------------------------------------------------------------*/ - -const char * TPI_Version() -{ - static const char version_string[] = - "TPI Version 1.1 , November 2009 , Configuration = " THREADPOOL_CONFIG ; - - return version_string ; -} - -/*--------------------------------------------------------------------*/ - -enum { THREAD_COUNT_MAX = 256 }; -enum { LOCK_COUNT_MAX = 32 }; - -struct ThreadPool_Data ; - -typedef struct Thread_Data { - struct Thread_Data * m_thread_fan ; /* Fan-in / fan-out begin */ - void * m_reduce ; /* Reduction memory */ - long m_rank ; - long m_barrier_wait_max ; - long m_barrier_wait_total ; - long m_barrier_wait_count ; - volatile long m_control ; -} Thread ; - -typedef struct ThreadPool_Data { - TPI_work_subprogram m_work_routine ; - const void * m_work_info ; - TPI_reduce_join m_reduce_join ; - TPI_reduce_init m_reduce_init ; - unsigned char * m_reduce_alloc ; - int m_reduce_alloc_size ; - int m_thread_count ; - int m_lock_init ; - int m_lock_count ; - int m_work_thread_count ; - int m_work_count ; - int m_work_count_claim ; - - Thread m_thread[ THREAD_COUNT_MAX ]; - local_lock_type m_lock[ LOCK_COUNT_MAX ]; -} ThreadPool ; - - -static ThreadPool thread_pool = -{ - /* m_work_routine */ NULL , - /* m_work_info */ NULL , - /* m_reduce_join */ NULL , - /* m_reduce_init */ NULL , - /* m_reduce_alloc */ NULL , - /* m_reduce_alloc_size */ 0 , - /* m_thread_count */ 0 , - /* m_lock_init */ 0 , - /* m_lock_count */ 0 , - /* m_work_thread_count */ 0 , - /* m_work_count */ 0 , - /* m_work_count_claim */ 0 -}; - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -#if defined( HAVE_PTHREAD ) - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -int TPI_Lock( int i ) -{ - int result = i < 0 || thread_pool.m_lock_count <= i ? TPI_ERROR_SIZE : 0 ; - - if ( ! result ) { - pthread_mutex_t * const lock = thread_pool.m_lock + i ; - - while ( EBUSY == ( result = pthread_mutex_trylock( lock ) ) ); - - if ( result ) { result = TPI_ERROR_LOCK ; } - } - return result ; -} - -int TPI_Unlock( int i ) -{ - int result = i < 0 || thread_pool.m_lock_count <= i ? TPI_ERROR_SIZE : 0 ; - - if ( ! result && pthread_mutex_unlock( thread_pool.m_lock + i ) ) { - result = TPI_ERROR_LOCK ; - } - - return result ; -} - -static int local_set_lock_count( const int lock_count ) -{ - int result = lock_count < 0 || LOCK_COUNT_MAX < lock_count - ? TPI_ERROR_SIZE : 0 ; - - while ( ! result && thread_pool.m_lock_init < lock_count ) { - - pthread_mutex_t * const lock = thread_pool.m_lock + - thread_pool.m_lock_init ; - - if ( pthread_mutex_init( lock , NULL ) ) { - result = TPI_ERROR_INTERNAL ; - } - else { - ++( thread_pool.m_lock_init ); - } - } - - return result ; -} - -static void local_destroy_locks() -{ - while ( thread_pool.m_lock_init ) { - --( thread_pool.m_lock_init ); - pthread_mutex_destroy( thread_pool.m_lock + thread_pool.m_lock_init ); - } -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ -/* Run work if any, then wait for child threads to block. */ - -static void local_run( Thread * const this_thread , void * reduce ) -{ - struct TPI_Work_Struct work ; - - work.info = thread_pool.m_work_info ; - work.reduce = reduce ; - work.count = thread_pool.m_work_count ; - work.lock_count = thread_pool.m_lock_count ; - - if ( work.count <= thread_pool.m_work_thread_count ) { - - work.rank = ( thread_pool.m_thread_count - 1 ) - this_thread->m_rank ; - - if ( work.rank < work.count ) { - (*thread_pool.m_work_routine)( & work ); - } - } - else { - - int * const claim = & thread_pool.m_work_count_claim ; - - while ( 0 < ( work.rank = atomic_fetch_and_decrement( claim ))) { - - work.rank = work.count - work.rank ; - - (*thread_pool.m_work_routine)( & work ); - } - } -} - -static int wait_thread( volatile long * const control , const int val ) -{ - int count = 0 ; - while ( val == *control ) { - sched_yield(); - ++count ; - } - return count ; -} - -static void local_barrier_wait( Thread * const this_thread , - Thread * const thread ) -{ - const long count = wait_thread( & thread->m_control , 1 ); - - ++( this_thread->m_barrier_wait_count ); - - this_thread->m_barrier_wait_total += count ; - - if ( this_thread->m_barrier_wait_max < count ) { - this_thread->m_barrier_wait_max = count ; - } -} - -static void local_barrier( Thread * const this_thread ) -{ - Thread * const thread_beg = this_thread[0].m_thread_fan ; - Thread * thread = this_thread[1].m_thread_fan ; - - if ( ! thread_pool.m_work_routine ) { - while ( thread_beg < thread ) { - --thread ; local_barrier_wait( this_thread , thread ); - } - } - else if ( ! thread_pool.m_reduce_join ) { - - local_run( this_thread , NULL ); - - while ( thread_beg < thread ) { - --thread ; local_barrier_wait( this_thread , thread ); - } - } - else { - - /* Work data for the reduction initialization and join */ - - struct TPI_Work_Struct work ; - - work.info = thread_pool.m_work_info ; - work.reduce = this_thread->m_reduce ; - work.count = -1 ; - work.rank = -1 ; - work.lock_count = -1 ; - - /* Initialize reduction value for non-root thread */ - - if ( this_thread->m_rank ) { (*thread_pool.m_reduce_init)( & work ); } - - /* Run the work routine with barrier blocking */ - - local_run( this_thread , work.reduce ); - - /* Reduction of thread's contributions */ - - while ( thread_beg < thread ) { - --thread ; local_barrier_wait( this_thread , thread ); - (*thread_pool.m_reduce_join)( & work , thread->m_reduce ); - } - } -} - -/*--------------------------------------------------------------------*/ -/* The driver given to 'pthread_create'. - * Run work until told to terminate. - */ -static void * local_driver( void * arg ) -{ - Thread * const this_thread = (Thread *) arg ; - - do { - /* Wait for my subtree of threads to complete */ - local_barrier( this_thread ); - - this_thread->m_control = 0 ; - - /* Spin until I am activated. */ - wait_thread( & this_thread->m_control , 0 ); - - } while ( thread_pool.m_work_routine ); - - local_barrier( this_thread ); /* Termination barrier */ - - this_thread->m_control = 0 ; - - return NULL ; -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -static void alloc_reduce( int reduce_size ) -{ - const int alloc_count = thread_pool.m_thread_count - 1 ; - - if ( thread_pool.m_reduce_alloc_size < alloc_count * reduce_size ) { - - const int grain_shift = 8 ; /* grain_size = 0x80 */ - const int grain_size = 1 << grain_shift ; /* Byte grain size */ - const int grain_count = ( reduce_size + grain_size - 1 ) >> grain_shift ; - const int reduce_grain = grain_size * grain_count ; - const int alloc_size = alloc_count * reduce_grain ; - - int i ; - - if ( thread_pool.m_reduce_alloc ) { - thread_pool.m_reduce_alloc = - (unsigned char *) realloc( thread_pool.m_reduce_alloc , alloc_size ); - } - else { - thread_pool.m_reduce_alloc = (unsigned char *) malloc( alloc_size ); - } - - thread_pool.m_reduce_alloc_size = alloc_size ; - - for ( i = 0 ; i < alloc_count ; ++i ) { - thread_pool.m_thread[i+1].m_reduce = - thread_pool.m_reduce_alloc + reduce_grain * i ; - } - } -} - -static int local_start( - int work_thread_count , - TPI_work_subprogram work_subprogram , - const void * work_info , - int work_count , - int lock_count , - TPI_reduce_join reduce_join , - TPI_reduce_init reduce_init , - int reduce_size , - void * reduce_data ) -{ - const int result = lock_count ? local_set_lock_count( lock_count ) : 0 ; - - if ( ! result ) { - - thread_pool.m_work_routine = work_subprogram ; - thread_pool.m_work_info = work_info ; - thread_pool.m_work_count = work_count ; - thread_pool.m_lock_count = lock_count ; - thread_pool.m_thread->m_reduce = reduce_data ; - - if ( 1 < thread_pool.m_thread_count ) { - - if ( reduce_size ) { alloc_reduce( reduce_size ); } - - thread_pool.m_reduce_join = reduce_join ; - thread_pool.m_reduce_init = reduce_init ; - thread_pool.m_work_thread_count = work_thread_count ; - thread_pool.m_work_count_claim = work_count ; - - /* Activate the spinning worker threads */ - { - Thread * const thread_beg = thread_pool.m_thread + 1 ; - Thread * thread = thread_pool.m_thread + - thread_pool.m_thread_count ; - - while ( thread_beg < thread ) { (--thread)->m_control = 1 ; } - } - } - } - - return result ; -} - -static void local_wait() -{ - if ( 1 < thread_pool.m_thread_count ) { - - local_barrier( thread_pool.m_thread ); - - thread_pool.m_reduce_join = NULL ; - thread_pool.m_reduce_init = NULL ; - thread_pool.m_work_thread_count = 0 ; - thread_pool.m_work_count_claim = 0 ; - } - else { - struct TPI_Work_Struct w = { NULL , NULL , 0 , 0 , 0 }; - - w.info = thread_pool.m_work_info ; - w.count = thread_pool.m_work_count ; - w.lock_count = thread_pool.m_lock_count ; - w.reduce = thread_pool.m_thread->m_reduce ; - - for ( w.rank = 0 ; w.rank < w.count ; ++( w.rank ) ) { - (* thread_pool.m_work_routine )( & w ); - } - } - - thread_pool.m_work_routine = NULL ; - thread_pool.m_work_info = NULL ; - thread_pool.m_work_count = 0 ; - thread_pool.m_lock_count = 0 ; - thread_pool.m_thread->m_reduce = NULL ; -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -int TPI_Init( int n ) -{ - int result = thread_pool.m_thread_count ? TPI_ERROR_ACTIVE : 0 ; - - if ( ! result && ( n < 1 || THREAD_COUNT_MAX + 1 <= n ) ) { - result = TPI_ERROR_SIZE ; - } - - if ( ! result ) { - pthread_attr_t attr ; - - if ( pthread_attr_init( & attr ) - || pthread_attr_setscope( & attr, PTHREAD_SCOPE_SYSTEM ) - || pthread_attr_setdetachstate( & attr, PTHREAD_CREATE_DETACHED ) ) { - result = TPI_ERROR_INTERNAL ; - } - - if ( ! result ) { - int thread_rank = 0 ; - int count = 1 ; - - /* Initialize one lock for blocking and unblocking */ - - local_set_lock_count( 1 ); - - /* Initialize threads with fan-in / fan-out span of threads */ - - for ( thread_rank = 0 ; thread_rank <= n ; ++thread_rank ) { - Thread * const thread = thread_pool.m_thread + thread_rank ; - - thread->m_thread_fan = thread_pool.m_thread + count ; - thread->m_reduce = NULL ; - thread->m_rank = thread_rank ; - thread->m_barrier_wait_max = 0 ; - thread->m_barrier_wait_total = 0 ; - thread->m_barrier_wait_count = 0 ; - thread->m_control = 1 ; - - { - int up = 1 ; - while ( up <= thread_rank ) { up <<= 1 ; } - while ( thread_rank + up < n ) { up <<= 1 ; ++count ; } - } - } - - thread_pool.m_thread_count = n ; - - /* Create threads last-to-first for start up fan-in barrier */ - - for ( thread_rank = n ; ! result && 1 < thread_rank ; ) { - Thread * const thread = thread_pool.m_thread + --thread_rank ; - - pthread_t pt ; - - if ( pthread_create( & pt, & attr, & local_driver, thread ) ) { - thread->m_control = 0 ; - result = TPI_ERROR_INTERNAL ; - } - } - - /* If a thread-spawn failed, terminate the created threads */ - - if ( result ) { - while ( thread_rank < --( thread_pool.m_thread_count ) ) { - Thread * thread = thread_pool.m_thread + thread_pool.m_thread_count ; - wait_thread( & thread->m_control , 1 ); /* Wait for blocking */ - thread->m_control = 1 ; /* Reactivate thread */ - wait_thread( & thread->m_control , 1 ); /* Wait for termination */ - } - thread_pool.m_thread_count = 0 ; - } - - pthread_attr_destroy( & attr ); - } - } - - if ( ! result ) { - local_barrier( thread_pool.m_thread ); - result = n ; - } - - return result ; -} - -/*--------------------------------------------------------------------*/ - -int TPI_Finalize() -{ - static int print_statistics = 0 ; - - int result ; - - result = NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : 0 ; - - if ( ! result ) { - - /* Wake up threads then wait for them to terminate */ - local_start( 0 , NULL , NULL , 0 , - 0 , NULL , NULL , 0 , NULL ); - - local_wait(); - - if ( print_statistics ) { - int i = 0 ; - for ( ; i < thread_pool.m_thread_count ; ++i ) { - if ( thread_pool.m_thread[i].m_barrier_wait_count ) { - long mean = ( thread_pool.m_thread[i].m_barrier_wait_total + 0.5 ) / - thread_pool.m_thread[i].m_barrier_wait_count ; - fprintf(stdout,"Thread[%d] barrier_wait( max %ld , mean %ld )\n", i , - thread_pool.m_thread[i].m_barrier_wait_max , mean ); - } - } - } - - thread_pool.m_thread_count = 0 ; - - local_destroy_locks(); - - if ( thread_pool.m_reduce_alloc ) { - free( thread_pool.m_reduce_alloc ); - thread_pool.m_reduce_alloc = NULL ; - thread_pool.m_reduce_alloc_size = 0 ; - } - } - - return result ; -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -static void local_block( TPI_Work * work ) -{ - if ( work->rank ) { - pthread_mutex_lock( thread_pool.m_lock ); - pthread_mutex_unlock( thread_pool.m_lock ); - } -} - -int TPI_Block() -{ - const int result = - NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( - pthread_mutex_lock( thread_pool.m_lock ) ? TPI_ERROR_INTERNAL : - - local_start( thread_pool.m_thread_count , - local_block , NULL , - thread_pool.m_thread_count , - 0 /* lock_count */ , - NULL , NULL , 0 , NULL ) ); - - return result ; -} - -int TPI_Unblock() -{ - const int result = - local_block != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( - pthread_mutex_unlock( thread_pool.m_lock ) ? TPI_ERROR_INTERNAL : 0 ); - - if ( ! result ) { local_wait(); } - - return result ; -} - -int TPI_Isblocked() -{ - return local_block == thread_pool.m_work_routine ; -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -#else /* ! defined( HAVE_PTHREAD ) */ - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -int TPI_Lock( int i ) -{ - int result = i < 0 || thread_pool.m_lock_count <= i ? TPI_ERROR_SIZE : 0 ; - - if ( ! result ) { - if ( 0 != thread_pool.m_lock[i] ) { - result = TPI_ERROR_LOCK ; - } - else { - thread_pool.m_lock[i] = 1 ; - } - } - return result ; -} - -int TPI_Unlock( int i ) -{ - int result = i < 0 || thread_pool.m_lock_count <= i ? TPI_ERROR_SIZE : 0 ; - - if ( ! result ) { - if ( 0 == thread_pool.m_lock[i] ) { - result = TPI_ERROR_LOCK ; - } - else { - thread_pool.m_lock[i] = 0 ; - } - } - return result ; -} - -static int local_set_lock_count( const int lock_count ) -{ - int result = lock_count < 0 || LOCK_COUNT_MAX < lock_count - ? TPI_ERROR_SIZE : 0 ; - - while ( thread_pool.m_lock_init < lock_count ) { - - thread_pool.m_lock[ thread_pool.m_lock_init ] = 0 ; - - ++( thread_pool.m_lock_init ); - } - - return result ; -} - -/*--------------------------------------------------------------------*/ - -static int local_start( - int work_thread_count , - TPI_work_subprogram work_subprogram , - const void * work_info , - int work_count , - int lock_count , - TPI_reduce_join reduce_join , - TPI_reduce_init reduce_init , - int reduce_size , - void * reduce_data ) -{ - const int result = lock_count ? local_set_lock_count( lock_count ) : 0 ; - - if ( ! result ) { - thread_pool.m_work_routine = work_subprogram ; - thread_pool.m_work_info = work_info ; - thread_pool.m_work_count = work_count ; - thread_pool.m_lock_count = lock_count ; - thread_pool.m_thread->m_reduce = reduce_data ; - } - - return result ; -} - -static void local_wait() -{ - struct TPI_Work_Struct w = { NULL , NULL , 0 , 0 , 0 }; - - w.info = thread_pool.m_work_info ; - w.count = thread_pool.m_work_count ; - w.lock_count = thread_pool.m_lock_count ; - w.reduce = thread_pool.m_thread->m_reduce ; - - for ( w.rank = 0 ; w.rank < w.count ; ++( w.rank ) ) { - (* thread_pool.m_work_routine )( & w ); - } - - thread_pool.m_work_routine = NULL ; - thread_pool.m_work_info = NULL ; - thread_pool.m_work_count = 0 ; - thread_pool.m_lock_count = 0 ; - thread_pool.m_thread->m_reduce = NULL ; -} - -/*--------------------------------------------------------------------*/ - -static void local_block( TPI_Work * work ) {} - -int TPI_Block() -{ - const int result = - NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : - - local_start( thread_pool.m_thread_count , - local_block , NULL , - thread_pool.m_thread_count , - 0 /* lock_count */ , - NULL , NULL , 0 , NULL ) ; - - return result ; -} - -int TPI_Unblock() -{ - const int result = - local_block != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : 0 ; - - if ( ! result ) { local_wait(); } - - return result ; -} - -int TPI_Isblocked() -{ - return local_block == thread_pool.m_work_routine ; -} - -/*--------------------------------------------------------------------*/ - -int TPI_Init( int n ) -{ - int result = thread_pool.m_thread_count ? TPI_ERROR_ACTIVE : 0 ; - - if ( ! result && ( n < 1 || THREAD_COUNT_MAX + 1 <= n ) ) { - result = TPI_ERROR_SIZE ; - } - else { - Thread * const thread = thread_pool.m_thread ; - - thread->m_thread_fan = NULL ; - thread->m_reduce = NULL ; - thread->m_rank = 0 ; - thread->m_barrier_wait_max = 0 ; - thread->m_barrier_wait_total = 0 ; - thread->m_barrier_wait_count = 0 ; - thread->m_control = 1 ; - - thread_pool.m_thread_count = result = n ; - - /* Initialize one lock for blocking and unblocking */ - - local_set_lock_count( 1 ); - } - - return result ; -} - -/*--------------------------------------------------------------------*/ - -int TPI_Finalize() -{ - int result = NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : 0 ; - - if ( ! result ) { - thread_pool.m_thread_count = 0 ; - thread_pool.m_lock_init = 0 ; - } - - return result ; -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -#endif - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -int TPI_Wait() -{ - const int result = - ( NULL == thread_pool.m_work_routine || - local_block == thread_pool.m_work_routine ) ? TPI_ERROR_ACTIVE : 0 ; - - if ( ! result ) { local_wait(); } - - return result ; -} - -int TPI_Start( TPI_work_subprogram work_subprogram , - const void * work_info , - int work_count , - int lock_count ) -{ - const int result = - NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( - NULL == work_subprogram ? TPI_ERROR_NULL : ( - work_count < 0 ? TPI_ERROR_SIZE : - local_start( thread_pool.m_thread_count - 1 , - work_subprogram , work_info , work_count , lock_count , - NULL , NULL , 0 , NULL ) ) ); - - return result ; -} - -int TPI_Run( TPI_work_subprogram work_subprogram , - const void * work_info , - int work_count , - int lock_count ) -{ - const int result = - NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( - NULL == work_subprogram ? TPI_ERROR_NULL : ( - work_count < 0 ? TPI_ERROR_SIZE : - local_start( thread_pool.m_thread_count , - work_subprogram , work_info , work_count , lock_count , - NULL , NULL , 0 , NULL ) ) ); - - if ( ! result ) { local_wait(); } - - return result ; -} - -int TPI_Run_threads( TPI_work_subprogram work_subprogram , - const void * work_info , - int lock_count ) -{ - const int work_count = 0 < thread_pool.m_thread_count ? - thread_pool.m_thread_count : 1 ; - - const int result = - NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( - NULL == work_subprogram ? TPI_ERROR_NULL : ( - local_start( thread_pool.m_thread_count , - work_subprogram , work_info , work_count , lock_count , - NULL , NULL , 0 , NULL ) ) ); - - if ( ! result ) { local_wait(); } - - return result ; -} - -int TPI_Start_threads( TPI_work_subprogram work_subprogram , - const void * work_info , - int lock_count ) -{ - const int work_count = 1 < thread_pool.m_thread_count ? - thread_pool.m_thread_count - 1 : 1 ; - - const int result = - NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( - NULL == work_subprogram ? TPI_ERROR_NULL : ( - local_start( thread_pool.m_thread_count - 1 , - work_subprogram , work_info , work_count , lock_count , - NULL , NULL , 0 , NULL ) ) ); - - if ( ! result ) { local_wait(); } - - return result ; -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -int TPI_Run_reduce( TPI_work_subprogram work_subprogram , - const void * work_info , - int work_count , - TPI_reduce_join reduce_join , - TPI_reduce_init reduce_init , - int reduce_size , - void * reduce_data ) -{ - const int lock_count = 0 ; - - const int result = - NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( - NULL == work_subprogram ? TPI_ERROR_NULL : ( - NULL == reduce_join ? TPI_ERROR_NULL : ( - NULL == reduce_init ? TPI_ERROR_NULL : ( - NULL == reduce_data ? TPI_ERROR_NULL : ( - work_count <= 0 ? TPI_ERROR_SIZE : ( - reduce_size <= 0 ? TPI_ERROR_SIZE : - - local_start( thread_pool.m_thread_count , - work_subprogram, work_info, work_count, lock_count, - reduce_join, reduce_init, reduce_size, reduce_data ))))))); - - if ( ! result ) { local_wait(); } - - return result ; -} - -int TPI_Run_threads_reduce( TPI_work_subprogram work_subprogram , - const void * work_info , - TPI_reduce_join reduce_join , - TPI_reduce_init reduce_init , - int reduce_size , - void * reduce_data ) -{ - const int lock_count = 0 ; - const int work_count = 0 < thread_pool.m_thread_count ? - thread_pool.m_thread_count : 1 ; - - const int result = - NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( - NULL == work_subprogram ? TPI_ERROR_NULL : ( - NULL == reduce_join ? TPI_ERROR_NULL : ( - NULL == reduce_init ? TPI_ERROR_NULL : ( - NULL == reduce_data ? TPI_ERROR_NULL : ( - reduce_size <= 0 ? TPI_ERROR_SIZE : - - local_start( thread_pool.m_thread_count , - work_subprogram , work_info , work_count , lock_count , - reduce_join, reduce_init, reduce_size, reduce_data )))))); - - if ( ! result ) { local_wait(); } - - return result ; -} - -int TPI_Start_reduce( TPI_work_subprogram work_subprogram , - const void * work_info , - int work_count , - TPI_reduce_join reduce_join , - TPI_reduce_init reduce_init , - int reduce_size , - void * reduce_data ) -{ - const int lock_count = 0 ; - - const int result = - NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( - NULL == work_subprogram ? TPI_ERROR_NULL : ( - NULL == reduce_join ? TPI_ERROR_NULL : ( - NULL == reduce_init ? TPI_ERROR_NULL : ( - NULL == reduce_data ? TPI_ERROR_NULL : ( - work_count <= 0 ? TPI_ERROR_SIZE : ( - reduce_size <= 0 ? TPI_ERROR_SIZE : - - local_start( thread_pool.m_thread_count - 1 , - work_subprogram , work_info , work_count , lock_count , - reduce_join, reduce_init, reduce_size, reduce_data ))))))); - - return result ; -} - -int TPI_Start_threads_reduce( TPI_work_subprogram work_subprogram , - const void * work_info , - TPI_reduce_join reduce_join , - TPI_reduce_init reduce_init , - int reduce_size , - void * reduce_data ) -{ - const int lock_count = 0 ; - const int work_count = 1 < thread_pool.m_thread_count ? - thread_pool.m_thread_count - 1 : 1 ; - - const int result = - NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( - NULL == work_subprogram ? TPI_ERROR_NULL : ( - NULL == reduce_join ? TPI_ERROR_NULL : ( - NULL == reduce_init ? TPI_ERROR_NULL : ( - NULL == reduce_data ? TPI_ERROR_NULL : ( - reduce_size <= 0 ? TPI_ERROR_SIZE : - - local_start( thread_pool.m_thread_count - 1 , - work_subprogram , work_info , work_count , lock_count , - reduce_join, reduce_init, reduce_size, reduce_data )))))); - - return result ; -} - - diff --git a/kokkos/basic/optional/ThreadPool/src/TPI.h b/kokkos/basic/optional/ThreadPool/src/TPI.h deleted file mode 100644 index 939d3be..0000000 --- a/kokkos/basic/optional/ThreadPool/src/TPI.h +++ /dev/null @@ -1,253 +0,0 @@ -/*------------------------------------------------------------------------*/ -/* TPI: Thread Pool Interface */ -/* Copyright (2008) Sandia Corporation */ -/* */ -/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ -/* license for use of this work by or on behalf of the U.S. Government. */ -/* */ -/* This library is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU Lesser General Public License as */ -/* published by the Free Software Foundation; either version 2.1 of the */ -/* License, or (at your option) any later version. */ -/* */ -/* This library is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* Lesser General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU Lesser General Public */ -/* License along with this library; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ -/* USA */ -/*------------------------------------------------------------------------*/ -/** - * @author H. Carter Edwards - * - * Thread Pool Interface (TPI). - * - * A simple and miminalistic interface for executing subprograms - * in a thread parallel, shared memory mode. - * - * States: the underlying thread pool has four states. - * 1) Uninitialized: no extra threads exist, this is the initial state. - * 2) Ready: extra threads exist and are ready to run a subprogram. - * 3) Active: extra threads are calling the subprogram. - * 4) Blocked: extra threads blocked. - * - * Threads are created on initialization and placed in the 'Ready' state. - * While in the 'Ready' state the threads are spin-waiting to minimize - * the cost of activating blocked threads. - * Threads can be blocked so that they do not compete for computatational - * resources with other threads created external to the TPI interface. - * For example, threads created by OpenMP or TBB. - */ - -#ifndef ThreadPoolInterface_h -#define ThreadPoolInterface_h - -#if defined( __cplusplus ) -extern "C" { -#endif - -/*--------------------------------------------------------------------*/ -/** \brief Version string. */ -const char * TPI_Version(); - -/** Start up the requested number of threads, less the calling thread. - * Return the actual number of threads, including the calling thread, - * otherwise return an error. - */ -int TPI_Init( int thread_count ); - -/** Shut down all started threads. */ -int TPI_Finalize(); - -/*--------------------------------------------------------------------*/ -/** \brief A utility to measure wall-clock time, which is frequently - * needed when performance testing HPC algorithms. - */ -double TPI_Walltime(); - -/*--------------------------------------------------------------------*/ -/* All functions return zero for success. */ - -#define TPI_ERROR_NULL ((int) -1) /**< NULL input */ -#define TPI_ERROR_SIZE ((int) -2) /**< BAD input: size or index */ -#define TPI_ERROR_LOCK ((int) -3) /**< BAD lock or unlock */ -#define TPI_ERROR_ACTIVE ((int) -4) /**< BAD input: the pool is active */ -#define TPI_ERROR_INTERNAL ((int) -5) /**< internal resource error */ - -/*--------------------------------------------------------------------*/ -/** \brief Work information passed to a work subprogram. */ -struct TPI_Work_Struct { - const void * info ; /**< Shared info input to TPI_Run */ - void * reduce ; /**< Data for reduce operation, if any */ - int count ; /**< Count of work requested via TPI_Run */ - int rank ; /**< Rank of work for the current call */ - int lock_count ; /**< Count of locks requested via TPI_Run */ -}; - -/** \brief Typedef for work subprogram argument */ -typedef const struct TPI_Work_Struct TPI_Work ; - -/** The interface for a parallel task */ -typedef void (*TPI_work_subprogram)( TPI_Work * ); - -/** The interface for a parallel reduction operation. - * Initialize work->reduce value. - */ -typedef -void (*TPI_reduce_init)( TPI_Work * work ); - -/** The interface for a parallel reduction operation. - * Perform reduction operation work->reduce OP= reduce. - * Every initialized reduce value will appear exactly - * once as the 'reduce' argument of a call to the join function. - */ -typedef -void (*TPI_reduce_join)( TPI_Work * work , const void * reduce ); - -/*--------------------------------------------------------------------*/ -/** \brief Run a work subprogram in thread parallel. - * - * The thread pool must be in the 'paused' state when this - * function is called. Thus a recursive call to TPI_Run is illegal. - */ -int TPI_Run( TPI_work_subprogram work_subprogram , - const void * work_info , - int work_count , - int lock_count ); - -/** \brief Run a work and reduction subprograms in thread parallel. - * - * Each call to the work_subprogram has exclusive (thread safe) - * access to its work->reduce data. - * The reduce_init and reduce_join subprograms have - * exclusive access to their arguments. - */ -int TPI_Run_reduce( TPI_work_subprogram work_subprogram , - const void * work_info , - int work_count , - TPI_reduce_join reduce_join , - TPI_reduce_init reduce_init , - int reduce_size , - void * reduce_data ); - -/** \brief Run a work subprogram exactly once on each thread. - * - * The thread pool must be in the 'paused' state when this - * function is called. Thus a recursive call to TPI_Run is illegal. - */ -int TPI_Run_threads( TPI_work_subprogram work_subprogram , - const void * work_info , - int lock_count ); - -/** \brief Run a work and reduction subprograms in thread parallel. - * - * Each call to the work_subprogram has exclusive (thread safe) - * access to its work->reduce data. - * The reduce_init and reduce_join subprograms have - * exclusive access to their arguments. - */ -int TPI_Run_threads_reduce( TPI_work_subprogram work_subprogram , - const void * work_info , - TPI_reduce_join reduce_join , - TPI_reduce_init reduce_init , - int reduce_size , - void * reduce_data ); - -/*--------------------------------------------------------------------*/ -/** \brief Start a work subprogram in thread parallel - * running on all but the 'main' calling thread; - * the 'main' calling thread returns immediately. - * - * The thread pool must be in the 'paused' state when this - * function is called. Thus a recursive call to TPI_Start is illegal. - */ -int TPI_Start( TPI_work_subprogram work_subprogram , - const void * work_info , - int work_count , - int lock_count ); - -/** \brief Start a work and reduction subprograms in thread parallel - * running on all but the 'main' calling thread; - * the 'main' calling thread returns immediately. - * - * Each call to the work_subprogram has exclusive (thread safe) - * access to its work->reduce data. - * The reduce_init and reduce_join subprograms have - * exclusive access to their arguments. - */ -int TPI_Start_reduce( TPI_work_subprogram work_subprogram , - const void * work_info , - int work_count , - TPI_reduce_join reduce_join , - TPI_reduce_init reduce_init , - int reduce_size , - void * reduce_data ); - -/** \brief Run a work subprogram on each thread - * that is not the 'main' calling thread. - * The 'main' calling thread returns immediately. - * - * The thread pool must be in the 'paused' state when this - * function is called. Thus a recursive call to TPI_Start_threads is illegal. - */ -int TPI_Start_threads( TPI_work_subprogram work_subprogram , - const void * work_info , - int lock_count ); - -/** \brief Start a work / reduction subprogram - * on each thread that is not the 'main' calling thread. - * The 'main' calling thread returns immediately. - * - * Each call to the work_subprogram has exclusive (thread safe) - * access to its work->reduce data. - * The reduce_init and reduce_join subprograms have - * exclusive access to their arguments. - */ -int TPI_Start_threads_reduce( TPI_work_subprogram work_subprogram , - const void * work_info , - TPI_reduce_join reduce_join , - TPI_reduce_init reduce_init , - int reduce_size , - void * reduce_data ); - -/** \brief Wait for a started work subprogram to complete. */ -int TPI_Wait(); - -/*--------------------------------------------------------------------*/ -/** \brief Block threads within the operating system. - * - * Normally the worker threads are unblocked and spinning for - * minimal start up overhead when running work subprograms. - * If no TPI work is to be performed for a long period of time - * then an application can block the worker threads. - */ -int TPI_Block(); - -/** \brief Unblock blocked threads within the operating system */ -int TPI_Unblock(); - -/** \brief Query if threads are blocked */ -int TPI_Isblocked(); - -/*--------------------------------------------------------------------*/ -/** \brief Blocks until lock lock_rank is obtained. - * The thread pool must be in the 'active' state. - */ -int TPI_Lock( int lock_rank ); - -/** \brief Unlocks lock lock_rank. - * The thread pool must be in the 'active' state. - */ -int TPI_Unlock( int lock_rank ); - -/*--------------------------------------------------------------------*/ - -#if defined( __cplusplus ) -} -#endif - -#endif - diff --git a/kokkos/basic/optional/ThreadPool/src/TPI.hpp b/kokkos/basic/optional/ThreadPool/src/TPI.hpp deleted file mode 100644 index fc1894e..0000000 --- a/kokkos/basic/optional/ThreadPool/src/TPI.hpp +++ /dev/null @@ -1,135 +0,0 @@ -/*------------------------------------------------------------------------*/ -/* TPI: Thread Pool Interface */ -/* Copyright (2008) Sandia Corporation */ -/* */ -/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ -/* license for use of this work by or on behalf of the U.S. Government. */ -/* */ -/* This library is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU Lesser General Public License as */ -/* published by the Free Software Foundation; either version 2.1 of the */ -/* License, or (at your option) any later version. */ -/* */ -/* This library is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* Lesser General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU Lesser General Public */ -/* License along with this library; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ -/* USA */ -/*------------------------------------------------------------------------*/ -/** - * @author H. Carter Edwards - */ - -#ifndef util_ThreadPool_hpp -#define util_ThreadPool_hpp - -#include - -namespace TPI { - -typedef TPI_Work Work ; - -//---------------------------------------------------------------------- -/** Run worker.*method(work) on all threads. - */ -template -int Run( Worker & worker , void (Worker::*method)(Work &) , - int work_count , int lock_count = 0 ); - -//---------------------------------------------------------------------- - -inline int Lock( int n ) { return TPI_Lock( n ); } -inline int Unlock( int n ) { return TPI_Unlock( n ); } - -/** Lock guard to insure that a lock is released - * when control exists a block. - * { - * TPI::LockGuard local_lock( i ); - * } - */ -class LockGuard { -private: - LockGuard(); - LockGuard( const LockGuard & ); - LockGuard & operator = ( const LockGuard & ); - const int m_value ; - const int m_result ; -public: - operator int() const { return m_result ; } - - explicit LockGuard( unsigned i_lock ) - : m_value( i_lock ), m_result( TPI_Lock(i_lock) ) {} - - ~LockGuard() { TPI_Unlock( m_value ); } -}; - -//---------------------------------------------------------------------- - -inline -int Init( int n ) { return TPI_Init( n ); } - -inline -int Finalize() { return TPI_Finalize(); } - -inline -double Walltime() { return TPI_Walltime(); } - -//---------------------------------------------------------------------- -//---------------------------------------------------------------------- - -namespace { - -template -class WorkerMethodHelper { -private: - WorkerMethodHelper(); - WorkerMethodHelper( const WorkerMethodHelper & ); - WorkerMethodHelper & operator = ( const WorkerMethodHelper & ); - -public: - - typedef void (Worker::*Method)( Work & ); - - Worker & worker ; - Method method ; - - WorkerMethodHelper( Worker & w , Method m ) : worker(w), method(m) {} - - static void run( TPI_Work * work ) - { - try { - const WorkerMethodHelper & wm = - * reinterpret_cast(work->info); - (wm.worker.*wm.method)(*work); - } catch(...){} - } -}; - -} - -//---------------------------------------------------------------------- -//---------------------------------------------------------------------- - -template -inline -int Run( Worker & worker, void (Worker::*method)(Work &) , - int work_count , int lock_count ) -{ - typedef WorkerMethodHelper WM ; - - WM tmp( worker , method ); - - return TPI_Run( reinterpret_cast(& WM::run),&tmp,work_count,lock_count); -} - -//---------------------------------------------------------------------- -//---------------------------------------------------------------------- - -} - -#endif - diff --git a/kokkos/basic/optional/ThreadPool/src/TPI_Walltime.c b/kokkos/basic/optional/ThreadPool/src/TPI_Walltime.c deleted file mode 100644 index d2c1fe4..0000000 --- a/kokkos/basic/optional/ThreadPool/src/TPI_Walltime.c +++ /dev/null @@ -1,44 +0,0 @@ -/*------------------------------------------------------------------------*/ -/* TPI: Thread Pool Interface */ -/* Copyright (2008) Sandia Corporation */ -/* */ -/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ -/* license for use of this work by or on behalf of the U.S. Government. */ -/* */ -/* This library is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU Lesser General Public License as */ -/* published by the Free Software Foundation; either version 2.1 of the */ -/* License, or (at your option) any later version. */ -/* */ -/* This library is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* Lesser General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU Lesser General Public */ -/* License along with this library; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ -/* USA */ -/*------------------------------------------------------------------------*/ -/** - * @author H. Carter Edwards - */ - -#include - -#include -#ifdef _MSC_VER -#include -#else -#include -#endif - -double TPI_Walltime() -{ - struct timeval tp ; - - gettimeofday( &tp , ((struct timezone *) NULL ) ); - - return ( (double) tp.tv_sec ) + ( (double) tp.tv_usec ) / 1.0e6 ; -} - diff --git a/kokkos/basic/optional/ThreadPool/src/ThreadPool_config.h.in b/kokkos/basic/optional/ThreadPool/src/ThreadPool_config.h.in deleted file mode 100644 index 752f5c5..0000000 --- a/kokkos/basic/optional/ThreadPool/src/ThreadPool_config.h.in +++ /dev/null @@ -1,71 +0,0 @@ -/* src/ThreadPool_config.h.in. Generated from configure.ac by autoheader. */ - -/* Define if you want to build export makefiles. */ -#undef HAVE_EXPORT_MAKEFILES - -/* Define if you are using gnumake - this will shorten your link lines. */ -#undef HAVE_GNUMAKE - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* Define if want to build libcheck */ -#undef HAVE_LIBCHECK - -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H - -/* define if we want to use MPI */ -#undef HAVE_MPI - -/* Define if want to build threadpool-tests */ -#undef HAVE_NEW_PACKAGE_TESTS - -/* Define if you have POSIX threads libraries and header files. */ -#undef HAVE_PTHREAD - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define if want to build tests */ -#undef HAVE_TESTS - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* Define to the necessary symbol if this constant uses a non-standard name on - your system. */ -#undef PTHREAD_CREATE_JOINABLE - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS diff --git a/kokkos/basic/optional/ThreadPool/test/CMakeLists.txt b/kokkos/basic/optional/ThreadPool/test/CMakeLists.txt deleted file mode 100644 index ff878e7..0000000 --- a/kokkos/basic/optional/ThreadPool/test/CMakeLists.txt +++ /dev/null @@ -1,86 +0,0 @@ - -INCLUDE(PackageAddExecutableAndTest) - -PACKAGE_ADD_EXECUTABLE( - test_tpi_unit - COMM serial mpi - SOURCES test_tpi_unit.c - DIRECTORY . - ) - -PACKAGE_ADD_EXECUTABLE( - test_c_dnax - COMM serial - SOURCES test_c_dnax.c - DIRECTORY . - ) - -PACKAGE_ADD_EXECUTABLE( - test_tpi_cpp - COMM serial - SOURCES test_tpi.cpp - DIRECTORY . - ) - -PACKAGE_ADD_EXECUTABLE( - test_tpi_sum - COMM serial mpi - SOURCES test_mpi_sum.c - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_unit - NAME test_tpi_unit_serial - COMM serial - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_unit - NAME test_tpi_unit_mpi - COMM mpi - NUM_MPI_PROCS 1 - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_cpp - NAME test_tpi_cpp - COMM serial - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_sum - NAME test_tpi_sum_serial - COMM serial - DIRECTORY . - XHOSTTYPE AIX - ) - -PACKAGE_ADD_TEST( - test_tpi_sum - NAME test_tpi_sum_np1 - COMM mpi - NUM_MPI_PROCS 1 - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_sum - NAME test_tpi_sum_np2 - COMM mpi - NUM_MPI_PROCS 2 - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_sum - NAME test_tpi_sum_np4 - COMM mpi - NUM_MPI_PROCS 4 - DIRECTORY . - ) - - diff --git a/kokkos/basic/optional/ThreadPool/test/Makefile.am b/kokkos/basic/optional/ThreadPool/test/Makefile.am deleted file mode 100644 index 8e78cbf..0000000 --- a/kokkos/basic/optional/ThreadPool/test/Makefile.am +++ /dev/null @@ -1,55 +0,0 @@ -#@HEADER -# ************************************************************************ -# -# ThreadPool Package -# Copyright (2008) Sandia Corporation -# -# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -# license for use of this work by or on behalf of the U.S. Government. -# -# This library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as -# published by the Free Software Foundation; either version 2.1 of the -# License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -# USA -# Questions? Contact Carter Edwards (hcedwar@sandia.gov) -# -# ************************************************************************ -#@HEADER - -SUBDIRS = - -# The following line helps the test harness recover from build errors. - -all-local: - -include $(top_builddir)/Makefile.export.threadpool - -EXEEXT = .exe - -noinst_PROGRAMS = test_tpi test_tpi_cpp test_sum - -test_tpi_SOURCES = test_main.c test_tpi_unit.c test_c_dnax.c test_c_tpi.c test_pthreads.c -test_tpi_DEPENDENCIES = $(top_builddir)/src/libtpi.a -test_tpi_CFLAGS = $(THREADPOOL_INCLUDES) -test_tpi_LDADD = $(THREADPOOL_LIBS) - -test_tpi_cpp_SOURCES = test_tpi.cpp -test_tpi_cpp_DEPENDENCIES = $(top_builddir)/src/libtpi.a -test_tpi_cpp_CXXFLAGS = $(THREADPOOL_INCLUDES) -test_tpi_cpp_LDADD = $(THREADPOOL_LIBS) - -test_sum_SOURCES = test_mpi_sum.c -test_sum_DEPENDENCIES = $(top_builddir)/src/libtpi.a -test_sum_CFLAGS = $(THREADPOOL_INCLUDES) -test_sum_LDADD = $(THREADPOOL_LIBS) - diff --git a/kokkos/basic/optional/ThreadPool/test/Makefile.in b/kokkos/basic/optional/ThreadPool/test/Makefile.in deleted file mode 100644 index ffc5220..0000000 --- a/kokkos/basic/optional/ThreadPool/test/Makefile.in +++ /dev/null @@ -1,730 +0,0 @@ -# Makefile.in generated by automake 1.10 from Makefile.am. -# @configure_input@ - -# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -@SET_MAKE@ - -#@HEADER -# ************************************************************************ -# -# ThreadPool Package -# Copyright (2008) Sandia Corporation -# -# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -# license for use of this work by or on behalf of the U.S. Government. -# -# This library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as -# published by the Free Software Foundation; either version 2.1 of the -# License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -# USA -# Questions? Contact Carter Edwards (hcedwar@sandia.gov) -# -# ************************************************************************ -#@HEADER - -VPATH = @srcdir@ -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ -am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd -install_sh_DATA = $(install_sh) -c -m 644 -install_sh_PROGRAM = $(install_sh) -c -install_sh_SCRIPT = $(install_sh) -c -INSTALL_HEADER = $(INSTALL_DATA) -transform = $(program_transform_name) -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -build_triplet = @build@ -host_triplet = @host@ -target_triplet = @target@ -noinst_PROGRAMS = test_tpi$(EXEEXT) test_tpi_cpp$(EXEEXT) \ - test_sum$(EXEEXT) -subdir = test -DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/config/acx_pthread.m4 \ - $(top_srcdir)/config/tac_arg_check_mpi.m4 \ - $(top_srcdir)/config/tac_arg_config_mpi.m4 \ - $(top_srcdir)/config/tac_arg_enable_export-makefiles.m4 \ - $(top_srcdir)/config/tac_arg_enable_feature.m4 \ - $(top_srcdir)/config/tac_arg_enable_feature_sub_check.m4 \ - $(top_srcdir)/config/tac_arg_with_ar.m4 \ - $(top_srcdir)/config/tac_arg_with_flags.m4 \ - $(top_srcdir)/config/tac_arg_with_incdirs.m4 \ - $(top_srcdir)/config/tac_arg_with_libdirs.m4 \ - $(top_srcdir)/config/tac_arg_with_libs.m4 \ - $(top_srcdir)/config/tac_arg_with_perl.m4 \ - $(top_srcdir)/configure.ac -am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ - $(ACLOCAL_M4) -mkinstalldirs = $(install_sh) -d -CONFIG_HEADER = $(top_builddir)/src/ThreadPool_config.h -CONFIG_CLEAN_FILES = -PROGRAMS = $(noinst_PROGRAMS) -am_test_sum_OBJECTS = test_sum-test_mpi_sum.$(OBJEXT) -test_sum_OBJECTS = $(am_test_sum_OBJECTS) -test_sum_LINK = $(CCLD) $(test_sum_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ - $(LDFLAGS) -o $@ -am_test_tpi_OBJECTS = test_tpi-test_main.$(OBJEXT) \ - test_tpi-test_tpi_unit.$(OBJEXT) \ - test_tpi-test_c_dnax.$(OBJEXT) test_tpi-test_c_tpi.$(OBJEXT) \ - test_tpi-test_pthreads.$(OBJEXT) -test_tpi_OBJECTS = $(am_test_tpi_OBJECTS) -test_tpi_LINK = $(CCLD) $(test_tpi_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ - $(LDFLAGS) -o $@ -am_test_tpi_cpp_OBJECTS = test_tpi_cpp-test_tpi.$(OBJEXT) -test_tpi_cpp_OBJECTS = $(am_test_tpi_cpp_OBJECTS) -test_tpi_cpp_LINK = $(CXXLD) $(test_tpi_cpp_CXXFLAGS) $(CXXFLAGS) \ - $(AM_LDFLAGS) $(LDFLAGS) -o $@ -DEFAULT_INCLUDES = -I. -I$(top_builddir)/src@am__isrc@ -depcomp = $(SHELL) $(top_srcdir)/config/depcomp -am__depfiles_maybe = depfiles -COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ - $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ -CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -CXXLD = $(CXX) -CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ - -o $@ -SOURCES = $(test_sum_SOURCES) $(test_tpi_SOURCES) \ - $(test_tpi_cpp_SOURCES) -DIST_SOURCES = $(test_sum_SOURCES) $(test_tpi_SOURCES) \ - $(test_tpi_cpp_SOURCES) -RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ - html-recursive info-recursive install-data-recursive \ - install-dvi-recursive install-exec-recursive \ - install-html-recursive install-info-recursive \ - install-pdf-recursive install-ps-recursive install-recursive \ - installcheck-recursive installdirs-recursive pdf-recursive \ - ps-recursive uninstall-recursive -RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ - distclean-recursive maintainer-clean-recursive -ETAGS = etags -CTAGS = ctags -DIST_SUBDIRS = $(SUBDIRS) -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) -ACLOCAL = @ACLOCAL@ -ALTERNATE_AR = @ALTERNATE_AR@ -AMTAR = @AMTAR@ -AUTOCONF = @AUTOCONF@ -AUTOHEADER = @AUTOHEADER@ -AUTOMAKE = @AUTOMAKE@ -AWK = @AWK@ -CC = @CC@ -CCDEPMODE = @CCDEPMODE@ -CFLAGS = @CFLAGS@ -CPPFLAGS = @CPPFLAGS@ -CXX = @CXX@ -CXXCPP = @CXXCPP@ -CXXDEPMODE = @CXXDEPMODE@ -CXXFLAGS = @CXXFLAGS@ -CYGPATH_W = @CYGPATH_W@ -DEFS = @DEFS@ -DEPDIR = @DEPDIR@ -ECHO_C = @ECHO_C@ -ECHO_N = @ECHO_N@ -ECHO_T = @ECHO_T@ -EGREP = @EGREP@ -EXEEXT = .exe -GREP = @GREP@ -HAVE_PERL = @HAVE_PERL@ -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -LDFLAGS = @LDFLAGS@ -LIBOBJS = @LIBOBJS@ -LIBS = @LIBS@ -LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ -MAKEINFO = @MAKEINFO@ -MKDIR_P = @MKDIR_P@ -MPI_CC_EXISTS = @MPI_CC_EXISTS@ -MPI_CXX = @MPI_CXX@ -MPI_CXX_EXISTS = @MPI_CXX_EXISTS@ -MPI_F77_EXISTS = @MPI_F77_EXISTS@ -MPI_TEMP_CXX = @MPI_TEMP_CXX@ -OBJEXT = @OBJEXT@ -PACKAGE = @PACKAGE@ -PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ -PACKAGE_NAME = @PACKAGE_NAME@ -PACKAGE_STRING = @PACKAGE_STRING@ -PACKAGE_TARNAME = @PACKAGE_TARNAME@ -PACKAGE_VERSION = @PACKAGE_VERSION@ -PATH_SEPARATOR = @PATH_SEPARATOR@ -PERL_EXE = @PERL_EXE@ -PTHREAD_CC = @PTHREAD_CC@ -PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ -PTHREAD_LIBS = @PTHREAD_LIBS@ -RANLIB = @RANLIB@ -SET_MAKE = @SET_MAKE@ -SHELL = @SHELL@ -STRIP = @STRIP@ -VERSION = @VERSION@ -abs_builddir = @abs_builddir@ -abs_srcdir = @abs_srcdir@ -abs_top_builddir = @abs_top_builddir@ -abs_top_srcdir = @abs_top_srcdir@ -ac_aux_dir = @ac_aux_dir@ -ac_ct_CC = @ac_ct_CC@ -ac_ct_CXX = @ac_ct_CXX@ -am__include = @am__include@ -am__leading_dot = @am__leading_dot@ -am__quote = @am__quote@ -am__tar = @am__tar@ -am__untar = @am__untar@ -bindir = @bindir@ -build = @build@ -build_alias = @build_alias@ -build_cpu = @build_cpu@ -build_os = @build_os@ -build_vendor = @build_vendor@ -builddir = @builddir@ -datadir = @datadir@ -datarootdir = @datarootdir@ -docdir = @docdir@ -dvidir = @dvidir@ -exec_prefix = @exec_prefix@ -host = @host@ -host_alias = @host_alias@ -host_cpu = @host_cpu@ -host_os = @host_os@ -host_vendor = @host_vendor@ -htmldir = @htmldir@ -includedir = @includedir@ -infodir = @infodir@ -install_sh = @install_sh@ -libdir = @libdir@ -libexecdir = @libexecdir@ -localedir = @localedir@ -localstatedir = @localstatedir@ -mandir = @mandir@ -mkdir_p = @mkdir_p@ -oldincludedir = @oldincludedir@ -pdfdir = @pdfdir@ -prefix = @prefix@ -program_transform_name = @program_transform_name@ -psdir = @psdir@ -sbindir = @sbindir@ -sharedstatedir = @sharedstatedir@ -srcdir = @srcdir@ -sysconfdir = @sysconfdir@ -target = @target@ -target_alias = @target_alias@ -target_cpu = @target_cpu@ -target_os = @target_os@ -target_vendor = @target_vendor@ -top_builddir = @top_builddir@ -top_srcdir = @top_srcdir@ -SUBDIRS = -test_tpi_SOURCES = test_main.c test_tpi_unit.c test_c_dnax.c test_c_tpi.c test_pthreads.c -test_tpi_DEPENDENCIES = $(top_builddir)/src/libtpi.a -test_tpi_CFLAGS = $(THREADPOOL_INCLUDES) -test_tpi_LDADD = $(THREADPOOL_LIBS) -test_tpi_cpp_SOURCES = test_tpi.cpp -test_tpi_cpp_DEPENDENCIES = $(top_builddir)/src/libtpi.a -test_tpi_cpp_CXXFLAGS = $(THREADPOOL_INCLUDES) -test_tpi_cpp_LDADD = $(THREADPOOL_LIBS) -test_sum_SOURCES = test_mpi_sum.c -test_sum_DEPENDENCIES = $(top_builddir)/src/libtpi.a -test_sum_CFLAGS = $(THREADPOOL_INCLUDES) -test_sum_LDADD = $(THREADPOOL_LIBS) -all: all-recursive - -.SUFFIXES: -.SUFFIXES: .c .cpp .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) - @for dep in $?; do \ - case '$(am__configure_deps)' in \ - *$$dep*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ - && exit 0; \ - exit 1;; \ - esac; \ - done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/Makefile'; \ - cd $(top_srcdir) && \ - $(AUTOMAKE) --foreign test/Makefile -.PRECIOUS: Makefile -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - @case '$?' in \ - *config.status*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ - *) \ - echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ - cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ - esac; - -$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -clean-noinstPROGRAMS: - -test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS) -test_sum$(EXEEXT): $(test_sum_OBJECTS) $(test_sum_DEPENDENCIES) - @rm -f test_sum$(EXEEXT) - $(test_sum_LINK) $(test_sum_OBJECTS) $(test_sum_LDADD) $(LIBS) -test_tpi$(EXEEXT): $(test_tpi_OBJECTS) $(test_tpi_DEPENDENCIES) - @rm -f test_tpi$(EXEEXT) - $(test_tpi_LINK) $(test_tpi_OBJECTS) $(test_tpi_LDADD) $(LIBS) -test_tpi_cpp$(EXEEXT): $(test_tpi_cpp_OBJECTS) $(test_tpi_cpp_DEPENDENCIES) - @rm -f test_tpi_cpp$(EXEEXT) - $(test_tpi_cpp_LINK) $(test_tpi_cpp_OBJECTS) $(test_tpi_cpp_LDADD) $(LIBS) - -mostlyclean-compile: - -rm -f *.$(OBJEXT) - -distclean-compile: - -rm -f *.tab.c - -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_sum-test_mpi_sum.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi-test_c_dnax.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi-test_c_tpi.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi-test_main.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi-test_pthreads.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi-test_tpi_unit.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi_cpp-test_tpi.Po@am__quote@ - -.c.o: -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(COMPILE) -c $< - -.c.obj: -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` - -test_sum-test_mpi_sum.o: test_mpi_sum.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_sum_CFLAGS) $(CFLAGS) -MT test_sum-test_mpi_sum.o -MD -MP -MF $(DEPDIR)/test_sum-test_mpi_sum.Tpo -c -o test_sum-test_mpi_sum.o `test -f 'test_mpi_sum.c' || echo '$(srcdir)/'`test_mpi_sum.c -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_sum-test_mpi_sum.Tpo $(DEPDIR)/test_sum-test_mpi_sum.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_mpi_sum.c' object='test_sum-test_mpi_sum.o' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_sum_CFLAGS) $(CFLAGS) -c -o test_sum-test_mpi_sum.o `test -f 'test_mpi_sum.c' || echo '$(srcdir)/'`test_mpi_sum.c - -test_sum-test_mpi_sum.obj: test_mpi_sum.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_sum_CFLAGS) $(CFLAGS) -MT test_sum-test_mpi_sum.obj -MD -MP -MF $(DEPDIR)/test_sum-test_mpi_sum.Tpo -c -o test_sum-test_mpi_sum.obj `if test -f 'test_mpi_sum.c'; then $(CYGPATH_W) 'test_mpi_sum.c'; else $(CYGPATH_W) '$(srcdir)/test_mpi_sum.c'; fi` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_sum-test_mpi_sum.Tpo $(DEPDIR)/test_sum-test_mpi_sum.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_mpi_sum.c' object='test_sum-test_mpi_sum.obj' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_sum_CFLAGS) $(CFLAGS) -c -o test_sum-test_mpi_sum.obj `if test -f 'test_mpi_sum.c'; then $(CYGPATH_W) 'test_mpi_sum.c'; else $(CYGPATH_W) '$(srcdir)/test_mpi_sum.c'; fi` - -test_tpi-test_main.o: test_main.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_main.o -MD -MP -MF $(DEPDIR)/test_tpi-test_main.Tpo -c -o test_tpi-test_main.o `test -f 'test_main.c' || echo '$(srcdir)/'`test_main.c -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_main.Tpo $(DEPDIR)/test_tpi-test_main.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_main.c' object='test_tpi-test_main.o' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_main.o `test -f 'test_main.c' || echo '$(srcdir)/'`test_main.c - -test_tpi-test_main.obj: test_main.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_main.obj -MD -MP -MF $(DEPDIR)/test_tpi-test_main.Tpo -c -o test_tpi-test_main.obj `if test -f 'test_main.c'; then $(CYGPATH_W) 'test_main.c'; else $(CYGPATH_W) '$(srcdir)/test_main.c'; fi` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_main.Tpo $(DEPDIR)/test_tpi-test_main.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_main.c' object='test_tpi-test_main.obj' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_main.obj `if test -f 'test_main.c'; then $(CYGPATH_W) 'test_main.c'; else $(CYGPATH_W) '$(srcdir)/test_main.c'; fi` - -test_tpi-test_tpi_unit.o: test_tpi_unit.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_tpi_unit.o -MD -MP -MF $(DEPDIR)/test_tpi-test_tpi_unit.Tpo -c -o test_tpi-test_tpi_unit.o `test -f 'test_tpi_unit.c' || echo '$(srcdir)/'`test_tpi_unit.c -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_tpi_unit.Tpo $(DEPDIR)/test_tpi-test_tpi_unit.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_tpi_unit.c' object='test_tpi-test_tpi_unit.o' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_tpi_unit.o `test -f 'test_tpi_unit.c' || echo '$(srcdir)/'`test_tpi_unit.c - -test_tpi-test_tpi_unit.obj: test_tpi_unit.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_tpi_unit.obj -MD -MP -MF $(DEPDIR)/test_tpi-test_tpi_unit.Tpo -c -o test_tpi-test_tpi_unit.obj `if test -f 'test_tpi_unit.c'; then $(CYGPATH_W) 'test_tpi_unit.c'; else $(CYGPATH_W) '$(srcdir)/test_tpi_unit.c'; fi` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_tpi_unit.Tpo $(DEPDIR)/test_tpi-test_tpi_unit.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_tpi_unit.c' object='test_tpi-test_tpi_unit.obj' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_tpi_unit.obj `if test -f 'test_tpi_unit.c'; then $(CYGPATH_W) 'test_tpi_unit.c'; else $(CYGPATH_W) '$(srcdir)/test_tpi_unit.c'; fi` - -test_tpi-test_c_dnax.o: test_c_dnax.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_c_dnax.o -MD -MP -MF $(DEPDIR)/test_tpi-test_c_dnax.Tpo -c -o test_tpi-test_c_dnax.o `test -f 'test_c_dnax.c' || echo '$(srcdir)/'`test_c_dnax.c -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_c_dnax.Tpo $(DEPDIR)/test_tpi-test_c_dnax.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_c_dnax.c' object='test_tpi-test_c_dnax.o' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_c_dnax.o `test -f 'test_c_dnax.c' || echo '$(srcdir)/'`test_c_dnax.c - -test_tpi-test_c_dnax.obj: test_c_dnax.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_c_dnax.obj -MD -MP -MF $(DEPDIR)/test_tpi-test_c_dnax.Tpo -c -o test_tpi-test_c_dnax.obj `if test -f 'test_c_dnax.c'; then $(CYGPATH_W) 'test_c_dnax.c'; else $(CYGPATH_W) '$(srcdir)/test_c_dnax.c'; fi` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_c_dnax.Tpo $(DEPDIR)/test_tpi-test_c_dnax.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_c_dnax.c' object='test_tpi-test_c_dnax.obj' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_c_dnax.obj `if test -f 'test_c_dnax.c'; then $(CYGPATH_W) 'test_c_dnax.c'; else $(CYGPATH_W) '$(srcdir)/test_c_dnax.c'; fi` - -test_tpi-test_c_tpi.o: test_c_tpi.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_c_tpi.o -MD -MP -MF $(DEPDIR)/test_tpi-test_c_tpi.Tpo -c -o test_tpi-test_c_tpi.o `test -f 'test_c_tpi.c' || echo '$(srcdir)/'`test_c_tpi.c -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_c_tpi.Tpo $(DEPDIR)/test_tpi-test_c_tpi.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_c_tpi.c' object='test_tpi-test_c_tpi.o' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_c_tpi.o `test -f 'test_c_tpi.c' || echo '$(srcdir)/'`test_c_tpi.c - -test_tpi-test_c_tpi.obj: test_c_tpi.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_c_tpi.obj -MD -MP -MF $(DEPDIR)/test_tpi-test_c_tpi.Tpo -c -o test_tpi-test_c_tpi.obj `if test -f 'test_c_tpi.c'; then $(CYGPATH_W) 'test_c_tpi.c'; else $(CYGPATH_W) '$(srcdir)/test_c_tpi.c'; fi` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_c_tpi.Tpo $(DEPDIR)/test_tpi-test_c_tpi.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_c_tpi.c' object='test_tpi-test_c_tpi.obj' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_c_tpi.obj `if test -f 'test_c_tpi.c'; then $(CYGPATH_W) 'test_c_tpi.c'; else $(CYGPATH_W) '$(srcdir)/test_c_tpi.c'; fi` - -test_tpi-test_pthreads.o: test_pthreads.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_pthreads.o -MD -MP -MF $(DEPDIR)/test_tpi-test_pthreads.Tpo -c -o test_tpi-test_pthreads.o `test -f 'test_pthreads.c' || echo '$(srcdir)/'`test_pthreads.c -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_pthreads.Tpo $(DEPDIR)/test_tpi-test_pthreads.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_pthreads.c' object='test_tpi-test_pthreads.o' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_pthreads.o `test -f 'test_pthreads.c' || echo '$(srcdir)/'`test_pthreads.c - -test_tpi-test_pthreads.obj: test_pthreads.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_pthreads.obj -MD -MP -MF $(DEPDIR)/test_tpi-test_pthreads.Tpo -c -o test_tpi-test_pthreads.obj `if test -f 'test_pthreads.c'; then $(CYGPATH_W) 'test_pthreads.c'; else $(CYGPATH_W) '$(srcdir)/test_pthreads.c'; fi` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_pthreads.Tpo $(DEPDIR)/test_tpi-test_pthreads.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_pthreads.c' object='test_tpi-test_pthreads.obj' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_pthreads.obj `if test -f 'test_pthreads.c'; then $(CYGPATH_W) 'test_pthreads.c'; else $(CYGPATH_W) '$(srcdir)/test_pthreads.c'; fi` - -.cpp.o: -@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< -@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< - -.cpp.obj: -@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` -@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` - -test_tpi_cpp-test_tpi.o: test_tpi.cpp -@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_cpp_CXXFLAGS) $(CXXFLAGS) -MT test_tpi_cpp-test_tpi.o -MD -MP -MF $(DEPDIR)/test_tpi_cpp-test_tpi.Tpo -c -o test_tpi_cpp-test_tpi.o `test -f 'test_tpi.cpp' || echo '$(srcdir)/'`test_tpi.cpp -@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/test_tpi_cpp-test_tpi.Tpo $(DEPDIR)/test_tpi_cpp-test_tpi.Po -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='test_tpi.cpp' object='test_tpi_cpp-test_tpi.o' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_cpp_CXXFLAGS) $(CXXFLAGS) -c -o test_tpi_cpp-test_tpi.o `test -f 'test_tpi.cpp' || echo '$(srcdir)/'`test_tpi.cpp - -test_tpi_cpp-test_tpi.obj: test_tpi.cpp -@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_cpp_CXXFLAGS) $(CXXFLAGS) -MT test_tpi_cpp-test_tpi.obj -MD -MP -MF $(DEPDIR)/test_tpi_cpp-test_tpi.Tpo -c -o test_tpi_cpp-test_tpi.obj `if test -f 'test_tpi.cpp'; then $(CYGPATH_W) 'test_tpi.cpp'; else $(CYGPATH_W) '$(srcdir)/test_tpi.cpp'; fi` -@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/test_tpi_cpp-test_tpi.Tpo $(DEPDIR)/test_tpi_cpp-test_tpi.Po -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='test_tpi.cpp' object='test_tpi_cpp-test_tpi.obj' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_cpp_CXXFLAGS) $(CXXFLAGS) -c -o test_tpi_cpp-test_tpi.obj `if test -f 'test_tpi.cpp'; then $(CYGPATH_W) 'test_tpi.cpp'; else $(CYGPATH_W) '$(srcdir)/test_tpi.cpp'; fi` - -# This directory's subdirectories are mostly independent; you can cd -# into them and run `make' without going through this Makefile. -# To change the values of `make' variables: instead of editing Makefiles, -# (1) if the variable is set in `config.status', edit `config.status' -# (which will cause the Makefiles to be regenerated when you run `make'); -# (2) otherwise, pass the desired values on the `make' command line. -$(RECURSIVE_TARGETS): - @failcom='exit 1'; \ - for f in x $$MAKEFLAGS; do \ - case $$f in \ - *=* | --[!k]*);; \ - *k*) failcom='fail=yes';; \ - esac; \ - done; \ - dot_seen=no; \ - target=`echo $@ | sed s/-recursive//`; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - echo "Making $$target in $$subdir"; \ - if test "$$subdir" = "."; then \ - dot_seen=yes; \ - local_target="$$target-am"; \ - else \ - local_target="$$target"; \ - fi; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ - || eval $$failcom; \ - done; \ - if test "$$dot_seen" = "no"; then \ - $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ - fi; test -z "$$fail" - -$(RECURSIVE_CLEAN_TARGETS): - @failcom='exit 1'; \ - for f in x $$MAKEFLAGS; do \ - case $$f in \ - *=* | --[!k]*);; \ - *k*) failcom='fail=yes';; \ - esac; \ - done; \ - dot_seen=no; \ - case "$@" in \ - distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ - *) list='$(SUBDIRS)' ;; \ - esac; \ - rev=''; for subdir in $$list; do \ - if test "$$subdir" = "."; then :; else \ - rev="$$subdir $$rev"; \ - fi; \ - done; \ - rev="$$rev ."; \ - target=`echo $@ | sed s/-recursive//`; \ - for subdir in $$rev; do \ - echo "Making $$target in $$subdir"; \ - if test "$$subdir" = "."; then \ - local_target="$$target-am"; \ - else \ - local_target="$$target"; \ - fi; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ - || eval $$failcom; \ - done && test -z "$$fail" -tags-recursive: - list='$(SUBDIRS)'; for subdir in $$list; do \ - test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ - done -ctags-recursive: - list='$(SUBDIRS)'; for subdir in $$list; do \ - test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ - done - -ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - mkid -fID $$unique -tags: TAGS - -TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - here=`pwd`; \ - if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ - include_option=--etags-include; \ - empty_fix=.; \ - else \ - include_option=--include; \ - empty_fix=; \ - fi; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - if test "$$subdir" = .; then :; else \ - test ! -f $$subdir/TAGS || \ - tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \ - fi; \ - done; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ - test -n "$$unique" || unique=$$empty_fix; \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - $$tags $$unique; \ - fi -ctags: CTAGS -CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(CTAGS_ARGS)$$tags$$unique" \ - || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ - $$tags $$unique - -GTAGS: - here=`$(am__cd) $(top_builddir) && pwd` \ - && cd $(top_srcdir) \ - && gtags -i $(GTAGS_ARGS) $$here - -distclean-tags: - -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags - -distdir: $(DISTFILES) - @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - list='$(DISTFILES)'; \ - dist_files=`for file in $$list; do echo $$file; done | \ - sed -e "s|^$$srcdirstrip/||;t" \ - -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ - case $$dist_files in \ - */*) $(MKDIR_P) `echo "$$dist_files" | \ - sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ - sort -u` ;; \ - esac; \ - for file in $$dist_files; do \ - if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ - if test -d $$d/$$file; then \ - dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ - if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ - cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ - fi; \ - cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ - else \ - test -f $(distdir)/$$file \ - || cp -p $$d/$$file $(distdir)/$$file \ - || exit 1; \ - fi; \ - done - list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ - if test "$$subdir" = .; then :; else \ - test -d "$(distdir)/$$subdir" \ - || $(MKDIR_P) "$(distdir)/$$subdir" \ - || exit 1; \ - distdir=`$(am__cd) $(distdir) && pwd`; \ - top_distdir=`$(am__cd) $(top_distdir) && pwd`; \ - (cd $$subdir && \ - $(MAKE) $(AM_MAKEFLAGS) \ - top_distdir="$$top_distdir" \ - distdir="$$distdir/$$subdir" \ - am__remove_distdir=: \ - am__skip_length_check=: \ - distdir) \ - || exit 1; \ - fi; \ - done -check-am: all-am -check: check-recursive -all-am: Makefile $(PROGRAMS) all-local -installdirs: installdirs-recursive -installdirs-am: -install: install-recursive -install-exec: install-exec-recursive -install-data: install-data-recursive -uninstall: uninstall-recursive - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am - -installcheck: installcheck-recursive -install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - `test -z '$(STRIP)' || \ - echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) - -maintainer-clean-generic: - @echo "This command is intended for maintainers to use" - @echo "it deletes files that may require special tools to rebuild." -clean: clean-recursive - -clean-am: clean-generic clean-noinstPROGRAMS mostlyclean-am - -distclean: distclean-recursive - -rm -rf ./$(DEPDIR) - -rm -f Makefile -distclean-am: clean-am distclean-compile distclean-generic \ - distclean-tags - -dvi: dvi-recursive - -dvi-am: - -html: html-recursive - -info: info-recursive - -info-am: - -install-data-am: - -install-dvi: install-dvi-recursive - -install-exec-am: - -install-html: install-html-recursive - -install-info: install-info-recursive - -install-man: - -install-pdf: install-pdf-recursive - -install-ps: install-ps-recursive - -installcheck-am: - -maintainer-clean: maintainer-clean-recursive - -rm -rf ./$(DEPDIR) - -rm -f Makefile -maintainer-clean-am: distclean-am maintainer-clean-generic - -mostlyclean: mostlyclean-recursive - -mostlyclean-am: mostlyclean-compile mostlyclean-generic - -pdf: pdf-recursive - -pdf-am: - -ps: ps-recursive - -ps-am: - -uninstall-am: - -.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \ - install-strip - -.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ - all all-am all-local check check-am clean clean-generic \ - clean-noinstPROGRAMS ctags ctags-recursive distclean \ - distclean-compile distclean-generic distclean-tags distdir dvi \ - dvi-am html html-am info info-am install install-am \ - install-data install-data-am install-dvi install-dvi-am \ - install-exec install-exec-am install-html install-html-am \ - install-info install-info-am install-man install-pdf \ - install-pdf-am install-ps install-ps-am install-strip \ - installcheck installcheck-am installdirs installdirs-am \ - maintainer-clean maintainer-clean-generic mostlyclean \ - mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \ - tags tags-recursive uninstall uninstall-am - - -# The following line helps the test harness recover from build errors. - -all-local: - -include $(top_builddir)/Makefile.export.threadpool -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/kokkos/basic/optional/ThreadPool/test/build_gnu b/kokkos/basic/optional/ThreadPool/test/build_gnu deleted file mode 100755 index bba4b90..0000000 --- a/kokkos/basic/optional/ThreadPool/test/build_gnu +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash - -TEST_SRC="test_main.c test_c_dnax.c test_tpi_unit.c test_pthreads.c" - -LIB_SRC="../src/TPI.c ../src/TPI_Walltime.c" - -LIB_OBJ="TPI.o TPI_Walltime.o" - -# OPT="-O3" -OPT="-g" -# OPT="-O" - -#CFLAGS="${OPT} -std=c99 -Wall -Wextra" - -CFLAGS=" ${OPT} -std=c89 -Wall -Wextra" -CCFLAGS="${OPT} -std=c++98 -Wall -Wextra" - -echo build: gcc ${CFLAGS} - -#----------------------------------------------------------------------- - -rm -f ThreadPool_config.h -echo "#define HAVE_PTHREAD 1" > ThreadPool_config.h - -gcc ${CFLAGS} -c \ - -I. -I../src ${LIB_SRC} - -gcc ${CFLAGS} \ - -o test_tpi.gnu.exe \ - -I. -I../src ${TEST_SRC} ${LIB_OBJ} -lpthread -lm - -g++ ${CCFLAGS} \ - -o test_tpi_cpp.gnu.exe \ - -I. -I../src test_tpi.cpp ${LIB_OBJ} -lpthread -lstdc++ -lm - -gcc ${CFLAGS} \ - -o test_sum.gnu.exe \ - -I. -I../src test_mpi_sum.c ${LIB_OBJ} -lpthread -lm - -#----------------------------------------------------------------------- - -mpicc ${CFLAGS} \ - -o test_sum.mpi.gnu.exe \ - -I. -I../src -DTEST_WITH_MPI test_mpi_sum.c ${LIB_OBJ} -lpthread -lm - -#----------------------------------------------------------------------- - -rm -f ThreadPool_config.h -echo "/* #define HAVE_PTHREAD 1 */" > ThreadPool_config.h - -gcc ${CFLAGS} -c \ - -I. -I../src ${LIB_SRC} - -gcc ${CFLAGS} \ - -o test_tpi.gnu.noth.exe \ - -I. -I../src ${TEST_SRC} ${LIB_OBJ} -lpthread -lm - -g++ ${CCFLAGS} \ - -o test_tpi_cpp.gnu.noth.exe \ - -I. -I../src test_tpi.cpp ${LIB_OBJ} -lpthread -lstdc++ -lm - -gcc ${CFLAGS} \ - -o test_sum.gnu.noth.exe \ - -I. -I../src test_mpi_sum.c ${LIB_OBJ} -lpthread -lm - -#----------------------------------------------------------------------- - -rm -f ThreadPool_config.h -echo "/* #define HAVE_PTHREAD 1 */" > ThreadPool_config.h -echo "#define HAVE_MPI 1" >> ThreadPool_config.h - -mpicc ${CFLAGS} \ - -o test_sum.mpi.gnu.noth.exe \ - -I. -I../src -DTEST_WITH_MPI test_mpi_sum.c ${LIB_OBJ} -lpthread -lm - -#----------------------------------------------------------------------- - -rm -f ThreadPool_config.h - diff --git a/kokkos/basic/optional/ThreadPool/test/build_intel b/kokkos/basic/optional/ThreadPool/test/build_intel deleted file mode 100755 index accb0a0..0000000 --- a/kokkos/basic/optional/ThreadPool/test/build_intel +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash - -# . /usr/local/modules/3.2.6/Modules/$MODULE_VERSION/bin/modulecmd tcsh \ -# load sierra-devel-desktop-intel-10.1ip - - -TEST_SRC="test_main.c test_c_dnax.c test_tpi_unit.c test_pthreads.c" - -LIB_SRC="../src/TPI.c ../src/TPI_Walltime.c" - -LIB_OBJ="TPI.o TPI_Walltime.o" - -#CFLAGS="-std=c99 -strict-ansi -Wall -Wcheck -Werror -wd141 -wd869 -wd1418 -wd1419" -#CFLAGS="-std=c89 -strict-ansi -Wall -Wcheck -Werror -wd141 -wd869 -wd1418 -wd1419" -CCFLAGS=" -strict-ansi -Wall -Wcheck -Werror -wd141 -wd869 -wd1418 -wd1419" - -OPT="-O3" -# OPT="-g" -# OPT="-O" - -echo build ${OPT} - -#----------------------------------------------------------------------- - -rm -f ThreadPool_config.h -echo "#define HAVE_PTHREAD 1" > ThreadPool_config.h - -icc ${CFLAGS} ${OPT} -c \ - -I. -I../src ${LIB_SRC} - -icc ${CFLAGS} ${OPT} \ - -o test_tpi.intel.exe \ - -I. -I../src ${TEST_SRC} ${LIB_OBJ} -lpthread - -icc ${CCFLAGS} ${OPT} \ - -o test_tpi_cpp.intel.exe \ - -I. -I../src test_tpi.cpp ${LIB_OBJ} -lpthread -lstdc++ - -icc ${CFLAGS} ${OPT} \ - -o test_sum.intel.exe \ - -I. -I../src test_mpi_sum.c ${LIB_OBJ} -lpthread - -#----------------------------------------------------------------------- - -mpicc ${CFLAGS} ${OPT} \ - -o test_sum.mpi.intel.exe \ - -I. -I../src -DTEST_WITH_MPI test_mpi_sum.c ${LIB_OBJ} -lpthread - -#----------------------------------------------------------------------- - -rm -f ThreadPool_config.h -echo "/* #define HAVE_PTHREAD 1 */" > ThreadPool_config.h - -icc ${CFLAGS} ${OPT} -c \ - -I. -I../src ${LIB_SRC} - -icc ${CFLAGS} ${OPT} \ - -o test_tpi.intel.noth.exe \ - -I. -I../src ${TEST_SRC} ${LIB_OBJ} -lpthread - -icc ${CCFLAGS} ${OPT} \ - -o test_tpi_cpp.intel.noth.exe \ - -I. -I../src test_tpi.cpp ${LIB_OBJ} -lpthread -lstdc++ - -icc ${CFLAGS} ${OPT} \ - -o test_sum.intel.noth.exe \ - -I. -I../src test_mpi_sum.c ${LIB_OBJ} -lpthread - -#----------------------------------------------------------------------- - -rm -f ThreadPool_config.h -echo "/* #define HAVE_PTHREAD 1 */" > ThreadPool_config.h -echo "#define HAVE_MPI 1" >> ThreadPool_config.h - -mpicc ${CFLAGS} ${OPT} \ - -o test_sum.mpi.intel.noth.exe \ - -I. -I../src -DTEST_WITH_MPI test_mpi_sum.c ${LIB_OBJ} -lpthread - -#----------------------------------------------------------------------- - -rm -f ThreadPool_config.h - diff --git a/kokkos/basic/optional/ThreadPool/test/build_pgi b/kokkos/basic/optional/ThreadPool/test/build_pgi deleted file mode 100755 index 85799cc..0000000 --- a/kokkos/basic/optional/ThreadPool/test/build_pgi +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -export LM_LICENSE_FILE=7500@reddish -PGI_HOME="/usr/local/pgi_64/linux86-64/7.0-7" -MPICH_HOME="/usr/local/mpi/mpich/64Bit/1.2.7/pgi-6.0" - -export PATH="${PGI_HOME}/bin:${PATH}" - -TEST_SRC="test_main.c test_c_dnax.c test_c_tpi.c test_pthreads.c" - -LIB_SRC="../src/TPI_pthreads.c ../src/TPI_Walltime.c ../src/TPI_Concurrency.c" - -LIB_OBJ="TPI_pthreads.o TPI_Walltime.o TPI_Concurrency.o" - -#----------------------------------------------------------------------- - -pgcc -O4 -c \ - -I../include ${LIB_SRC} -lpthread - -pgcc -O4 \ - -o test_tpi.pgi.exe \ - -I../include ${TEST_SRC} ${LIB_OBJ} -lpthread - -pgCC -O4 \ - -o test_tpi_cpp.pgi.exe \ - -I../include test_tpi.cpp ${LIB_OBJ} -lpthread - -#----------------------------------------------------------------------- -# Enable PGI-MPI installation to accept as large a message as possible, 200 Mb - -# export P4_GLOBMEMSIZE="268435456" - -export PATH="${MPICH_HOME}/bin:${PGI_HOME}/bin:${PATH}" - -mpicc -c99 \ - -O4 \ - -o test_sum.mpi.pgi.exe \ - -I../include -DTEST_WITH_MPI test_mpi_sum.c ${LIB_OBJ} -lpthread - diff --git a/kokkos/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.c b/kokkos/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.c deleted file mode 100644 index 5f2866f..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.c +++ /dev/null @@ -1,562 +0,0 @@ - - -#include -#include - -#include - -/*--------------------------------------------------------------------*/ -/* Recursively split a box into into (up-ip) sub-boxes */ - -typedef const int RangeInput[2] ; -typedef int RangeOutput[2] ; -typedef RangeInput * const BoxInput ; -typedef RangeOutput * const BoxOutput ; - -static -void box_partition( int ip , int up , int axis , - BoxInput box , - int (* const p_box)[3][2] ) -{ - const int np = up - ip ; - if ( 1 == np ) { - p_box[ip][0][0] = box[0][0] ; p_box[ip][0][1] = box[0][1] ; - p_box[ip][1][0] = box[1][0] ; p_box[ip][1][1] = box[1][1] ; - p_box[ip][2][0] = box[2][0] ; p_box[ip][2][1] = box[2][1] ; - } - else { - const int n = box[ axis ][1] - box[ axis ][0] ; - const int np_low = np / 2 ; /* Rounded down */ - const int np_upp = np - np_low ; - - const int n_upp = (int) (((double) n) * ( ((double)np_upp) / ((double)np))); - const int n_low = n - n_upp ; - const int next_axis = ( axis + 2 ) % 3 ; - - if ( np_low ) { /* P = [ip,ip+np_low) */ - int dbox[3][2] ; - dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; - dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; - dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; - - dbox[ axis ][1] = dbox[ axis ][0] + n_low ; - - box_partition( ip, ip + np_low, next_axis, - (const int (*)[2]) dbox, p_box ); - } - - if ( np_upp ) { /* P = [ip+np_low,ip+np_low+np_upp) */ - int dbox[3][2] ; - dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; - dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; - dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; - - ip += np_low ; - dbox[ axis ][0] += n_low ; - dbox[ axis ][1] = dbox[ axis ][0] + n_upp ; - - box_partition( ip, ip + np_upp, next_axis, - (const int (*)[2]) dbox, p_box ); - } - } -} - -void box_partition_rcb( const int np , - const int root_box[3][2] , - int pbox[][3][2] ) -{ - box_partition( 0 , np , 2 , root_box , pbox ); -} - -/*--------------------------------------------------------------------*/ - -static int box_intersect( BoxInput a , BoxInput b , BoxOutput c ) -{ - int i ; - for ( i = 0 ; i < 3 ; ++i ) { - c[i][0] = a[i][0] < b[i][0] ? b[i][0] : a[i][0] ; - c[i][1] = a[i][1] < b[i][1] ? a[i][1] : b[i][1] ; - } - - return c[0][0] < c[0][1] && c[1][0] < c[1][1] && c[2][0] < c[2][1] ; -} - - -/*--------------------------------------------------------------------*/ - -static void global_to_use_box( BoxInput gbox , - BoxInput pbox , - const int ghost , - BoxOutput interiorBox , - BoxOutput useBox ) -{ - int i = 0 ; - - for ( i = 0 ; i < 3 ; ++i ) { - const int n = pbox[i][1] - pbox[i][0] ; - - if ( n < 0 ) { - abort(); - } - - interiorBox[i][0] = gbox[i][0] == pbox[i][0] - ? gbox[i][0] : pbox[i][0] + ghost ; - - interiorBox[i][1] = gbox[i][1] == pbox[i][1] - ? gbox[i][1] : pbox[i][1] - ghost ; - - if ( interiorBox[i][1] < pbox[i][0] ) { - interiorBox[i][1] = pbox[i][0] ; - } - - if ( interiorBox[i][0] > pbox[i][1] ) { - interiorBox[i][0] = pbox[i][1] ; - } - - if ( interiorBox[i][1] < interiorBox[i][0] ) { - interiorBox[i][1] = interiorBox[i][0] ; - } - - useBox[i][0] = pbox[i][0] - ghost ; - useBox[i][1] = pbox[i][1] + ghost ; - - if ( useBox[i][0] < gbox[i][0] ) { useBox[i][0] = gbox[i][0] ; } - if ( useBox[i][1] > gbox[i][1] ) { useBox[i][1] = gbox[i][1] ; } - } -} - - -/* A use-box is the owned box plus the ghost layers. - * Map a global (x,y,z) to a local integer ordinate. - */ -static int map_global_to_use_box( BoxInput useBox , - const int global_x , - const int global_y , - const int global_z ) -{ - const int nx = useBox[0][1] - useBox[0][0] ; - const int ny = useBox[1][1] - useBox[1][0] ; - const int nz = useBox[2][1] - useBox[2][0] ; - const int ix = global_x - useBox[0][0] ; - const int iy = global_y - useBox[1][0] ; - const int iz = global_z - useBox[2][0] ; - - const int good = 0 <= ix && ix < nx && - 0 <= iy && iy < ny && - 0 <= iz && iz < nz ; - - if ( nx < 0 || ny < 0 || nz < 0 ) { - abort(); - } - if ( ! good ) { - abort(); - } - - return good ? ix + iy * nx + iz * nx * ny : -1 ; -} - -int box_map_local( const int local_uses[3][2] , - const int map_local_id[] , - const int global_x , - const int global_y , - const int global_z ) -{ - int i = map_global_to_use_box( local_uses , global_x , global_y , global_z ); - - if ( 0 <= i ) { i = map_local_id[i] ; } - - return i ; -} - - -/*--------------------------------------------------------------------*/ - -static void resize_int( int ** a , int * allocLen , int newLen ) -{ - int k = 32; - while ( k < newLen ) { k <<= 1 ; } - if ( NULL == *a ) - { *a = malloc( sizeof(int)*(*allocLen = k) ); } - else if ( *allocLen < k ) - { *a = realloc(*a , sizeof(int)*(*allocLen = k)); } -} - -void box_partition_map( - const int np , - const int my_p , - const int gbox[3][2] , - const int pbox[][3][2] , - const int ghost , - - int map_use_box[3][2] , - int map_local_id[] , - int * map_count_interior , - int * map_count_owns , - int * map_count_uses , - int ** map_recv_pc , - int ** map_send_pc , - int ** map_send_id ) -{ - int * recv_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); - int * send_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); - - int id_length = 0 ; - - int * send_id = NULL ; - int send_id_size = 0 ; - - int own_length , use_length , int_length ; - int count_interior , count_parallel ; - int iSend ; - int g_ix , g_iy , g_iz ; - int i ; - - int my_int_box[3][2] ; - - global_to_use_box( gbox , pbox[my_p] , ghost , my_int_box , map_use_box ); - - own_length = ( pbox[my_p][0][1] - pbox[my_p][0][0] ) * - ( pbox[my_p][1][1] - pbox[my_p][1][0] ) * - ( pbox[my_p][2][1] - pbox[my_p][2][0] ); - - use_length = ( map_use_box[0][1] - map_use_box[0][0] ) * - ( map_use_box[1][1] - map_use_box[1][0] ) * - ( map_use_box[2][1] - map_use_box[2][0] ); - - int_length = ( my_int_box[0][1] - my_int_box[0][0] ) * - ( my_int_box[1][1] - my_int_box[1][0] ) * - ( my_int_box[2][1] - my_int_box[2][0] ); - - for ( i = 0 ; i < id_length ; ++i ) { map_local_id[i] = -1 ; } - - /* Fill in locally owned portion: { interior , parallel } */ - - count_interior = 0 ; - count_parallel = int_length ; - - for ( g_iz = pbox[my_p][2][0] ; g_iz < pbox[my_p][2][1] ; ++g_iz ) { - for ( g_iy = pbox[my_p][1][0] ; g_iy < pbox[my_p][1][1] ; ++g_iy ) { - for ( g_ix = pbox[my_p][0][0] ; g_ix < pbox[my_p][0][1] ; ++g_ix ) { - - const int local = - map_global_to_use_box( (BoxInput) map_use_box, g_ix, g_iy, g_iz ); - - if ( local < 0 ) { - abort(); - } - - if ( my_int_box[2][0] <= g_iz && g_iz < my_int_box[2][1] && - my_int_box[1][0] <= g_iy && g_iy < my_int_box[1][1] && - my_int_box[0][0] <= g_ix && g_ix < my_int_box[0][1] ) { - /* Interior */ - map_local_id[ local ] = count_interior++ ; - } - else { - /* Parallel */ - map_local_id[ local ] = count_parallel++ ; - } - } - } - } - - if ( count_interior != int_length ) { abort(); } - if ( count_parallel != own_length ) { abort(); } - - /* Fill in off-process received portion: { ( i + my_p ) % np } */ - - recv_pc[0] = count_parallel ; - recv_pc[1] = count_parallel ; - send_pc[0] = 0 ; - send_pc[1] = 0 ; - iSend = 0 ; - - for ( i = 1 ; i < np ; ++i ) { - const int ip = ( i + my_p ) % np ; - int recv_box[3][2] ; - int send_box[3][2] ; - int other_int_box[3][2] ; - int other_use_box[3][2] ; - - /* Received portions */ - - if ( box_intersect( (BoxInput) map_use_box , (BoxInput) pbox[ip] , recv_box ) ) { - - for ( g_iz = recv_box[2][0] ; g_iz < recv_box[2][1] ; ++g_iz ) { - for ( g_iy = recv_box[1][0] ; g_iy < recv_box[1][1] ; ++g_iy ) { - for ( g_ix = recv_box[0][0] ; g_ix < recv_box[0][1] ; ++g_ix ) { - - const int local = map_global_to_use_box( (BoxInput) map_use_box, g_ix, g_iy, g_iz ); - - map_local_id[ local ] = count_parallel++ ; - } - } - } - } - recv_pc[i+1] = count_parallel ; - - /* Sent items */ - - global_to_use_box( gbox, pbox[ip], ghost, other_int_box, other_use_box ); - - if ( box_intersect( (BoxInput) other_use_box , (BoxInput) pbox[my_p] , send_box ) ) { - - int nSend = ( send_box[0][1] - send_box[0][0] ) * - ( send_box[1][1] - send_box[1][0] ) * - ( send_box[2][1] - send_box[2][0] ); - - resize_int( & send_id , & send_id_size , (iSend + nSend ) ); - - for ( g_iz = send_box[2][0] ; g_iz < send_box[2][1] ; ++g_iz ) { - for ( g_iy = send_box[1][0] ; g_iy < send_box[1][1] ; ++g_iy ) { - for ( g_ix = send_box[0][0] ; g_ix < send_box[0][1] ; ++g_ix ) { - - const int local = map_global_to_use_box( (BoxInput) map_use_box, g_ix, g_iy, g_iz ); - - if ( map_local_id[ local ] < count_interior ) { abort(); } - - send_id[ iSend ] = map_local_id[ local ] ; - ++iSend ; - } - } - } - } - send_pc[i+1] = iSend ; - } - - if ( count_parallel != use_length ) { abort(); } - - *map_count_interior = int_length ; - *map_count_owns = own_length ; - *map_count_uses = use_length ; - *map_recv_pc = recv_pc ; - *map_send_pc = send_pc ; - *map_send_id = send_id ; -} - -/*--------------------------------------------------------------------*/ - -#ifdef UNIT_TEST - -static int box_contain( const int a[3][2] , const int b[3][2] ) -{ - return a[0][0] <= b[0][0] && b[0][1] <= a[0][1] && - a[1][0] <= b[1][0] && b[1][1] <= a[1][1] && - a[2][0] <= b[2][0] && b[2][1] <= a[2][1] ; -} - -static void box_print( FILE * fp , const int a[][2] ) -{ - fprintf(fp,"{ [ %d , %d ) , [ %d , %d ) , [ %d , %d ) }", - a[0][0] , a[0][1] , - a[1][0] , a[1][1] , - a[2][0] , a[2][1] ); -} - -static int box_disjoint( BoxInput a , BoxInput b ) -{ - return a[0][1] <= b[0][0] || b[0][1] <= a[0][0] || - a[1][1] <= b[1][0] || b[1][1] <= a[1][0] || - a[2][1] <= b[2][0] || b[2][1] <= a[2][0] ; -} - - -static void test_box( const int box[3][2] , const int np ) -{ - const int ncell_box = box[0][1] * box[1][1] * box[2][1] ; - int ncell_total = 0 ; - int ncell_min = ncell_box ; - int ncell_max = 0 ; - int (*pbox)[3][2] ; - int i , j ; - - pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); - - box_partition( 0 , np , 2 , box , pbox ); - - for ( i = 0 ; i < np ; ++i ) { - const int ncell = ( pbox[i][0][1] - pbox[i][0][0] ) * - ( pbox[i][1][1] - pbox[i][1][0] ) * - ( pbox[i][2][1] - pbox[i][2][0] ); - - if ( ! box_contain( box , (const int (*)[2]) pbox[i] ) ) { - fprintf(stdout," OUT OF BOUNDS pbox[%d/%d] = ",i,np); - box_print(stdout,(const int (*)[2]) pbox[i]); - fprintf(stdout,"\n"); - abort(); - } - - for ( j = i + 1 ; j < np ; ++j ) { - if ( ! box_disjoint( (const int (*)[2]) pbox[i] , - (const int (*)[2]) pbox[j] ) ) { - fprintf(stdout," NOT DISJOINT pbox[%d/%d] = ",i,np); - box_print(stdout, (const int (*)[2]) pbox[i]); - fprintf(stdout,"\n"); - fprintf(stdout," pbox[%d/%d] = ",j,np); - box_print(stdout, (const int (*)[2]) pbox[j]); - fprintf(stdout,"\n"); - abort(); - } - } - ncell_total += ncell ; - - if ( ncell_max < ncell ) { ncell_max = ncell ; } - if ( ncell < ncell_min ) { ncell_min = ncell ; } - } - - if ( ncell_total != ncell_box ) { - fprintf(stdout," WRONG CELL COUNT NP = %d\n",np); - abort(); - } - fprintf(stdout,"NP = %d, total = %d, avg = %d, min = %d, max = %d\n", - np,ncell_box,ncell_box/np,ncell_min,ncell_max); - - free( pbox ); -} - -/*--------------------------------------------------------------------*/ - -static void test_maps( const int root_box[][2] , const int np ) -{ - const int ghost = 1 ; - const int nx_global = root_box[0][1] - root_box[0][0] ; - const int ny_global = root_box[1][1] - root_box[1][0] ; - int map_count_interior , map_count_owns , map_count_uses ; - int map_use_box[3][2] ; - int ieq , i , j ; - int (*pbox)[3][2] ; - int **local_values ; - int **map_local_id ; - int **map_recv_pc ; - int **map_send_pc ; - int **map_send_id ; - - pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); - - box_partition( 0 , np , 2 , root_box , pbox ); - - local_values = (int **) malloc( sizeof(int*) * np ); - map_local_id = (int **) malloc( sizeof(int*) * np ); - map_recv_pc = (int **) malloc( sizeof(int*) * np ); - map_send_pc = (int **) malloc( sizeof(int*) * np ); - map_send_id = (int **) malloc( sizeof(int*) * np ); - - /* Set each local value to the global equation number */ - - for ( ieq = i = 0 ; i < np ; ++i ) { - const int (*mybox)[2] = (const int (*)[2]) pbox[i] ; - const int nx = mybox[0][1] - mybox[0][0] ; - const int ny = mybox[1][1] - mybox[1][0] ; - const int nz = mybox[2][1] - mybox[2][0] ; - int ix , iy , iz ; - - map_local_id[i] = (int *) malloc( sizeof(int) * - ( nx + 2 * ghost ) * - ( ny + 2 * ghost ) * - ( nz + 2 * ghost ) ); - - /* Generate the partition maps for this rank */ - box_partition_map( np , i , root_box , - (const int (*)[3][2]) pbox , ghost , - map_use_box , - map_local_id[i] , - & map_count_interior , - & map_count_owns , - & map_count_uses , - & map_recv_pc[i] , - & map_send_pc[i] , & map_send_id[i] ); - - if ( map_count_uses != map_recv_pc[i][np] ) { abort(); } - - local_values[i] = (int *) malloc( sizeof(int) * map_count_uses ); - - for ( iz = map_use_box[2][0] ; iz < map_use_box[2][1] ; ++iz ) { - for ( iy = map_use_box[1][0] ; iy < map_use_box[1][1] ; ++iy ) { - for ( ix = map_use_box[0][0] ; ix < map_use_box[0][1] ; ++ix ) { - - const int igrid = map_global_to_use_box((BoxInput)map_use_box,ix,iy,iz); - const int ieq = map_local_id[i][ igrid ]; - - if ( 0 <= ieq ) { - local_values[i][ ieq ] = - ix + iy * nx_global + iz * nx_global * ny_global ; - } - } - } - } - } - - /* Pair-wise compare the local values */ - /* i == receiving processor rank */ - /* ip == sending processor rank */ - /* j == receiving processor data entry for message from 'ip' */ - /* jp == sending processor data entry for message to 'i' */ - - for ( i = 0 ; i < np ; ++i ) { - for ( j = 1 ; j < np ; ++j ) { - const int ip = ( i + j ) % np ; - const int jp = ( i + np - ip ) % np ; - const int nrecv = map_recv_pc[i] [j+1] - map_recv_pc[i] [j] ; - const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ; - int k ; - if ( nrecv != nsend ) { - fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip); - fprintf(stderr,"P%d send %d to P%d\n",ip,nsend,i); - abort(); - } - for ( k = 0 ; k < nrecv ; ++k ) { - const int irecv = map_recv_pc[i][j] + k ; - const int isend = map_send_pc[ip][jp] + k ; - const int val_irecv = local_values[i][irecv] ; - const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ; - if ( val_irecv != val_isend ) { - fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip); - fprintf(stderr,"P%d send[%d] = %d , to P%d\n",ip,k,val_isend,i); - abort(); - } - } - } - } - - for ( i = 0 ; i < np ; ++i ) { - free( map_local_id[i] ); - free( map_recv_pc[i] ); - free( map_send_pc[i] ); - free( map_send_id[i] ); - free( local_values[i] ); - } - free( map_send_id ); - free( map_send_pc ); - free( map_recv_pc ); - free( map_local_id ); - free( local_values ); - free( pbox ); -} - -/*--------------------------------------------------------------------*/ - -int main( int argc , char * argv[] ) -{ - int np_max = 256 ; - int box[3][2] = { { 0 , 64 } , { 0 , 64 } , { 0 , 64 } }; - int np = 0 ; - - switch( argc ) { - case 3: - sscanf(argv[1],"%d",&np); - sscanf(argv[2],"%dx%dx%d",& box[0][1] , & box[1][1] , & box[2][1] ); - if ( 0 < np ) { test_box( (const int (*)[2]) box , np ); } - if ( 0 < np ) { test_maps( (const int (*)[2]) box , np ); } - break ; - default: - for ( np = 1 ; np <= np_max ; ++np ) { - test_box( (const int (*)[2]) box , np ); - test_maps( (const int (*)[2]) box , np ); - } - break ; - } - return 0 ; -} - -#endif - - diff --git a/kokkos/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.h b/kokkos/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.h deleted file mode 100644 index 71d71f5..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.h +++ /dev/null @@ -1,88 +0,0 @@ - - -#ifndef BoxPartionIB_h -#define BoxPartionIB_h - -/** \brief Partition a { [ix,jx) X [iy,jy) X [iz,jz) } box. - * - * Use recursive coordinate bisection to partition a box - * into np disjoint sub-boxes. Allocate (via malloc) and - * populate the sub-boxes, mapping the local (x,y,z) to - * a local ordinal, and mappings for the send-recv messages - * to update the ghost cells. - * - * Order local ordinates as follows: - * { - * interior , - * boundary , - * remote[ ( my_p + i ) % np ] - * } - * where i = 1..(np-1) - * - * usage: - * - * my_nx = pbox[my_p][0][1] - pbox[my_p][0][0] ; - * my_ny = pbox[my_p][1][1] - pbox[my_p][1][0] ; - * my_nz = pbox[my_p][2][1] - pbox[my_p][2][0] ; - * - * for ( x = -ghost ; x < my_nx + ghost ; ++x ) { - * for ( y = -ghost ; y < my_ny + ghost ; ++y ) { - * for ( z = -ghost ; z < my_nz + ghost ; ++z ) { - * const int x_global = x + pbox[my_p][0][0] ; - * const int y_global = y + pbox[my_p][1][0] ; - * const int z_global = z + pbox[my_p][2][0] ; - * - * const int local_ordinal = - * box_map_local( pbox[my_p], ghost, map_local_id, x, y, z ); - * - * if ( 0 <= local_ordinal ) { - * } - * } - * - * for ( i = 1 ; i < np ; ++i ) { - * const int recv_processor = ( my_p + i ) % np ; - * const int recv_ordinal_begin = map_recv_pc[i]; - * const int recv_ordinal_end = map_recv_pc[i+1]; - * } - * - * for ( i = 1 ; i < np ; ++i ) { - * const int send_processor = ( my_p + i ) % np ; - * const int send_map_begin = map_send_pc[i]; - * const int send_map_end = map_send_pc[i+1]; - * for ( j = send_map_begin ; j < send_map_end ; ++j ) { - * send_ordinal = map_send_id[j] ; - * } - * } - */ - - -void box_partition_rcb( - const int np /**< [in] Number of partitions */ , - const int root_box[3][2] /**< [in] Global 3D box to partition */ , - int pbox[][3][2] /**< [out] Partition of global 3D boxes */ ); - -void box_partition_map( - const int np /**< [in] Number of partitions */ , - const int my_p /**< [in] My partition */ , - const int gbox[3][2] /**< [in] Global 3D box */ , - const int pbox[][3][2] /**< [in] Partitions of global 3D box */ , - const int ghost /**< [in] Number of grid points to ghost */ , - - int map_uses_box[3][2] /**< [out] Local box expanded by ghosting */ , - int map_local_id[] /**< [out] Mapping for local points */ , - int * map_count_interior /**< [out] Number of my interior points */ , - int * map_count_owns /**< [out] Number of points I own */ , - int * map_count_uses /**< [out] Number of points I access */ , - int ** map_recv_pc /**< [out] Received prefix spans per process */ , - int ** map_send_pc /**< [out] Send prefix counts per process */ , - int ** map_send_id /**< [out] Send grid points */ ); - -/* \brief Map a global (x,y,z) to a local ordinal. */ -int box_map_local( const int local_uses[3][2] , - const int map_local_id[] , - const int global_x , - const int global_y , - const int global_z ); - -#endif - diff --git a/kokkos/basic/optional/ThreadPool/test/hhpccg/CGSolver.c b/kokkos/basic/optional/ThreadPool/test/hhpccg/CGSolver.c deleted file mode 100644 index 55f739d..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hhpccg/CGSolver.c +++ /dev/null @@ -1,311 +0,0 @@ - -#include -#include -#include -#include - -#include -#include -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -/*--------------------------------------------------------------------*/ - -void cgsolve_set_lhs( const struct distributed_crs_matrix * const matrix , - const VECTOR_SCALAR * const x , - VECTOR_SCALAR * const b ) -{ - const int nRow = matrix->n_local_row ; - const int nVec = matrix->p_recv_pc[ matrix->p_size ] ; - - VECTOR_SCALAR * const p = - (VECTOR_SCALAR *) malloc( nVec * sizeof(VECTOR_SCALAR) ); - - tpi_copy( nRow , x , p ); - - dcrs_apply( matrix , p , b ); - - free( p ); -} - -/*--------------------------------------------------------------------*/ - -/* x += alpha * p ; - * r -= alpha * Ap ; - * return dot( r , r ); - */ -static -double cgsolver_update( const int length , - const VECTOR_SCALAR alpha , - const VECTOR_SCALAR * p , - const VECTOR_SCALAR * Ap , - VECTOR_SCALAR * x , - VECTOR_SCALAR * r ); - -/*--------------------------------------------------------------------*/ - -void cgsolve_blas( const struct distributed_crs_matrix * matrix , - const VECTOR_SCALAR * const b , - VECTOR_SCALAR * const x , - const VECTOR_SCALAR tolerance , - const int max_iter , - const int print_iter , - int * const iter_count , - VECTOR_SCALAR * const norm_resid , - double * const solve_dt ) -{ - const int nRow = matrix->n_local_row ; - const int nVec = matrix->p_recv_pc[ matrix->p_size ] ; - - const VECTOR_SCALAR tol_2 = tolerance * tolerance ; - - VECTOR_SCALAR * const r = - (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); - VECTOR_SCALAR * const p = - (VECTOR_SCALAR *) malloc( nVec * sizeof(VECTOR_SCALAR) ); - VECTOR_SCALAR * const Ap = - (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); - - VECTOR_SCALAR rtrans = 0.0 ; - VECTOR_SCALAR beta = 0.0 ; - VECTOR_SCALAR pAp = 0.0 ; - VECTOR_SCALAR alpha ; - double time_begin , time_end ; - - int k ; - - tpi_copy( nRow , b , r ); - tpi_copy( nRow , x , p ); - - /* Ap = matrix * p ; */ - dcrs_apply( matrix , p , Ap ); - - /* r -= Ap ; */ - tpi_axpy( nRow , -1.0 , Ap , r ); - - rtrans = tpi_dot( nRow , r , r ); - - time_begin = TPI_Walltime(); - - for ( k = 0 ; k < max_iter && tol_2 < rtrans ; ++k ) { - - /* p = r + beta * p ; */ - tpi_xpby( nRow, r, beta, p ); /* parallel */ - - dcrs_apply( matrix , p , Ap ); - - pAp = tpi_dot( nRow , p , Ap ); - - /* If orthogonal then cannot update */ - alpha = 0 < fabs( pAp ) ? rtrans / pAp : 0.0 ; - - /* x += alpha * p ; - * r -= alpha * Ap ; - * return dot( r , r ); - */ - beta = rtrans ; - - tpi_axpy( nRow , alpha , p , x ); - tpi_axpy( nRow , -alpha , Ap , r ); - rtrans = tpi_dot( nRow , r , r ); - beta = rtrans / beta ; - } - - time_end = TPI_Walltime(); - -#ifdef HAVE_MPI - { - double tb = time_begin ; - double te = time_end ; - MPI_Allreduce(&tb, &time_begin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); - MPI_Allreduce(&te, &time_end, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); - } -#endif - - *solve_dt += time_end - time_begin ; - - *norm_resid = sqrt( rtrans ); - *iter_count = k ; - - free( Ap ); - free( p ); - free( r ); -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -void cgsolve( const struct distributed_crs_matrix * matrix , - const VECTOR_SCALAR * const b , - VECTOR_SCALAR * const x , - const int overlap_comm , - const VECTOR_SCALAR tolerance , - const int max_iter , - const int print_iter , - int * const iter_count , - VECTOR_SCALAR * const norm_resid , - double * const solve_dt ) -{ - const int nRow = matrix->n_local_row ; - const int nVec = matrix->p_recv_pc[ matrix->p_size ] ; - - const VECTOR_SCALAR tol_2 = tolerance * tolerance ; - - VECTOR_SCALAR * const r = - (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); - VECTOR_SCALAR * const p = - (VECTOR_SCALAR *) malloc( nVec * sizeof(VECTOR_SCALAR) ); - VECTOR_SCALAR * const Ap = - (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); - - VECTOR_SCALAR rtrans = 0.0 ; - VECTOR_SCALAR beta = 0.0 ; - VECTOR_SCALAR pAp = 0.0 ; - VECTOR_SCALAR alpha ; - double time_begin , time_end ; - - int k ; - - tpi_copy( nRow , b , r ); - tpi_copy( nRow , x , p ); - - /* gather off-processor components of 'p'. - * Ap = matrix * p ; - * return dot( Ap , p ); - */ - pAp = dcrs_apply_and_dot( matrix , p , Ap , overlap_comm ); - - /* r -= 1.0 * Ap ; - * return dot( r , r ); - */ - alpha = 1.0 ; - rtrans = cgsolver_update( nRow, alpha, NULL, Ap, NULL, r ); /* parallel */ - - time_begin = TPI_Walltime(); - - for ( k = 0 ; k < max_iter && tol_2 < rtrans ; ++k ) { - - /* p = r + beta * p ; */ - tpi_xpby( nRow, r, beta, p ); /* parallel */ - - /* gather off-processor components of 'p'. - * Ap = matrix * p ; - * return dot( Ap , p ); - */ - pAp = dcrs_apply_and_dot( matrix , p , Ap , overlap_comm ); /* parallel */ - - /* If orthogonal then cannot update */ - alpha = 0 < fabs( pAp ) ? rtrans / pAp : 0.0 ; - - /* x += alpha * p ; - * r -= alpha * Ap ; - * return dot( r , r ); - */ - beta = rtrans ; - rtrans = cgsolver_update( nRow , alpha , p , Ap , x , r ); /* parallel */ - beta = rtrans / beta ; - } - - time_end = TPI_Walltime(); - -#ifdef HAVE_MPI - { - double tb = time_begin ; - double te = time_end ; - MPI_Allreduce(&tb, &time_begin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); - MPI_Allreduce(&te, &time_end, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); - } -#endif - - *solve_dt += time_end - time_begin ; - - *norm_resid = sqrt( rtrans ); - *iter_count = k ; - - free( Ap ); - free( p ); - free( r ); -} - -/*--------------------------------------------------------------------*/ - -struct tpi_work_cgsolve { - const VECTOR_SCALAR * p ; - const VECTOR_SCALAR * Ap ; - VECTOR_SCALAR * x ; - VECTOR_SCALAR * r ; - VECTOR_SCALAR alpha ; - int length ; -}; - -static void tpi_work_dot_join( TPI_Work * work , const void * src ) -{ *((double *) work->reduce ) += *((const double *) src); } - -static void tpi_work_dot_init( TPI_Work * work ) -{ *((double *) work->reduce ) = 0 ; } - -static void tpi_work_update( TPI_Work * work ) -{ - const struct tpi_work_cgsolve * const cg_work = - (const struct tpi_work_cgsolve *) work->info ; - - const int length = cg_work->length ; - const VECTOR_SCALAR alpha = cg_work->alpha ; - const VECTOR_SCALAR * const p = cg_work->p ; - const VECTOR_SCALAR * const Ap = cg_work->Ap ; - VECTOR_SCALAR * const x = cg_work->x ; - VECTOR_SCALAR * const r = cg_work->r ; - - double mag = 0 ; - int iBeg , iEnd , i ; - - tpi_work_span( work , length , & iBeg , & iEnd ); - - if ( x ) { for ( i = iBeg ; i < iEnd ; ++i ) { x[i] += alpha * p[i]; } } - - for ( i = iBeg ; i < iEnd ; ++i ) { - const VECTOR_SCALAR val = ( r[i] -= alpha * Ap[i] ); - mag += val * val ; - } - - *((double*) work->reduce ) = mag ; -} - -double cgsolver_update( const int length , - const VECTOR_SCALAR alpha , - const VECTOR_SCALAR * p , - const VECTOR_SCALAR * Ap , - VECTOR_SCALAR * x , - VECTOR_SCALAR * r ) -{ - struct tpi_work_cgsolve work ; - - double result = 0.0 ; - - work.length = length ; - work.alpha = alpha ; - work.p = p ; - work.Ap = Ap ; - work.x = x ; - work.r = r ; - - TPI_Run_threads_reduce( tpi_work_update , & work , - tpi_work_dot_join , tpi_work_dot_init , - sizeof(result) , & result ); - -#ifdef HAVE_MPI - { - double local = result ; - MPI_Allreduce( & local, & result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); - } -#endif - - return result ; -} - -/*--------------------------------------------------------------------*/ - diff --git a/kokkos/basic/optional/ThreadPool/test/hhpccg/CGSolver.h b/kokkos/basic/optional/ThreadPool/test/hhpccg/CGSolver.h deleted file mode 100644 index f0ee6f6..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hhpccg/CGSolver.h +++ /dev/null @@ -1,40 +0,0 @@ - -#ifndef CGSolver_h -#define CGSolver_h - -#include -#include - -/*--------------------------------------------------------------------*/ - -void cgsolve_set_lhs( const struct distributed_crs_matrix * matrix , - const VECTOR_SCALAR * const x , - VECTOR_SCALAR * const b ); - -/* Solve with fused loops */ -void cgsolve( const struct distributed_crs_matrix * matrix , - const VECTOR_SCALAR * const b , - VECTOR_SCALAR * const x , - const int overlap_comm , - const VECTOR_SCALAR tolerance , - const int max_iter , - const int print_iter , - int * const iter_count , - VECTOR_SCALAR * const norm_resid , - double * const solve_dt ); - -/* Solve with blas-like calls */ -void cgsolve_blas( const struct distributed_crs_matrix * matrix , - const VECTOR_SCALAR * const b , - VECTOR_SCALAR * const x , - const VECTOR_SCALAR tolerance , - const int max_iter , - const int print_iter , - int * const iter_count , - VECTOR_SCALAR * const norm_resid , - double * const solve_dt ); - -/*--------------------------------------------------------------------*/ - -#endif - diff --git a/kokkos/basic/optional/ThreadPool/test/hhpccg/CMakeLists.txt b/kokkos/basic/optional/ThreadPool/test/hhpccg/CMakeLists.txt deleted file mode 100644 index 0c652cd..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hhpccg/CMakeLists.txt +++ /dev/null @@ -1,83 +0,0 @@ - -INCLUDE(PackageAddExecutableAndTest) -INCLUDE(PackageLibraryMacros) - -#################### - -SET(HEADERS "") -SET(SOURCES "") - -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) - -SET(HEADERS ${HEADERS} - ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h - ) - -INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) - -APPEND_SET(HEADERS - BoxPartition.h - CGSolver.h - tpi_vector.h - dcrs_matrix.h - ) - -#################### - - -PACKAGE_ADD_EXECUTABLE( - test_tpi_hhpccg - COMM serial mpi - SOURCES main.c CGSolver.c BoxPartitionIB.c tpi_vector.c dcrs_matrix.c - DEPLIBS pthread m - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hhpccg - NAME test_tpi_hhpccg_serial_1 - COMM serial - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hhpccg - NAME test_tpi_hhpccg_serial_2 - COMM serial - ARGS "threads=2" - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hhpccg - NAME test_tpi_hhpccg_serial_4 - COMM serial - ARGS "threads=4" - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hhpccg - NAME test_tpi_hhpccg_mpi_1 - COMM mpi - NUM_MPI_PROCS 1 - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hhpccg - NAME test_tpi_hhpccg_mpi_2 - COMM mpi - NUM_MPI_PROCS 2 - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hhpccg - NAME test_tpi_hhpccg_mpi_4 - COMM mpi - NUM_MPI_PROCS 4 - DIRECTORY . - ) - - diff --git a/kokkos/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.c b/kokkos/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.c deleted file mode 100644 index d61404f..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.c +++ /dev/null @@ -1,314 +0,0 @@ - -#include -#include - -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -#include - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -#if ! defined( HAVE_MPI ) - -static -double comm_sum( double v ) { return v ; } - -#define get_off_process_entries( M , V ) /* */ - -/*--------------------------------------------------------------------*/ -#else /* defined( HAVE_MPI ) */ -/*--------------------------------------------------------------------*/ - -static -double comm_sum( double v ) -{ - double result = 0 ; - MPI_Allreduce( & v , & result , 1 , MPI_DOUBLE , MPI_SUM , MPI_COMM_WORLD ); - return result ; -} - -static -void get_off_process_entries( - const struct distributed_crs_matrix * const matrix , - VECTOR_SCALAR * const vec ) -{ - const int np = matrix->p_size ; - const int my_p = matrix->p_rank ; - const int * const recv_pc = matrix->p_recv_pc ; - const int * const send_pc = matrix->p_send_pc ; - const int * const send_id = matrix->p_send_id ; - int i , irecv ; - - for ( irecv = 0 , i = 1 ; i < np ; ++i ) { - if ( recv_pc[i] < recv_pc[i+1] ) ++irecv ; - } - - { - VECTOR_SCALAR * const send_buf = - (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * send_pc[np] ); - - MPI_Request * const recv_request = - (MPI_Request *) malloc( sizeof(MPI_Request) * irecv ); - - MPI_Status * const recv_status = - (MPI_Status *) malloc( sizeof(MPI_Status) * irecv ); - - for ( irecv = 0 , i = 1 ; i < np ; ++i ) { - const int ip = ( i + my_p ) % np ; - const int recv_beg = recv_pc[i]; - const int recv_length = recv_pc[i+1] - recv_beg ; - if ( recv_length ) { - MPI_Irecv( vec + recv_beg , - recv_length * sizeof(VECTOR_SCALAR), MPI_BYTE , - ip , 0 , MPI_COMM_WORLD , recv_request + irecv ); - ++irecv ; - } - } - - /* Gather components into send buffer */ - - for ( i = 0 ; i < send_pc[np] ; ++i ) { - send_buf[i] = vec[ send_id[i] ]; - } - - MPI_Barrier( MPI_COMM_WORLD ); - - for ( i = 1 ; i < np ; ++i ) { - const int ip = ( i + my_p ) % np ; - const int send_beg = send_pc[i]; - const int send_length = send_pc[i+1] - send_beg ; - if ( send_length ) { /* Send to 'i' */ - MPI_Rsend( send_buf + send_beg , - send_length * sizeof(VECTOR_SCALAR), MPI_BYTE , - ip , 0 , MPI_COMM_WORLD ); - } - } - - MPI_Waitall( irecv , recv_request , recv_status ); - - free( recv_status ); - free( recv_request ); - free( send_buf ); - } -} - -#endif - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -static void dcrs_apply_and_dot_span( - const struct distributed_crs_matrix * const matrix , - const int span_begin , - const int span_end , - const VECTOR_SCALAR * const x , - VECTOR_SCALAR * const y , - double * const result ) -{ - const int * const A_pc = matrix->A_pc ; - const int * const A_ia = matrix->A_ia ; - const MATRIX_SCALAR * const A_a = matrix->A_a ; - - double dot_x_y = *result ; - - int row = span_begin ; - - for ( ; row < span_end ; ++row ) { - const int pcBeg = A_pc[ row ]; - const int pcEnd = A_pc[ row + 1 ]; - - const int * ia = A_ia + pcBeg ; - const MATRIX_SCALAR * a = A_a + pcBeg ; - const MATRIX_SCALAR * const a_end = A_a + pcEnd ; - - VECTOR_SCALAR y_tmp = 0 ; - for ( ; a != a_end ; ++a , ++ia ) { - y_tmp += *a * x[ *ia ]; - } - dot_x_y += x[ row ] * y_tmp ; - y[ row ] = y_tmp ; - } - - *result = dot_x_y ; -} - -static void dcrs_apply_span( - const struct distributed_crs_matrix * const matrix , - const int span_begin , - const int span_end , - const VECTOR_SCALAR * const x , - VECTOR_SCALAR * const y ) -{ - const int * const A_pc = matrix->A_pc ; - const int * const A_ia = matrix->A_ia ; - const MATRIX_SCALAR * const A_a = matrix->A_a ; - - int row = span_begin ; - - for ( ; row < span_end ; ++row ) { - const int pcBeg = A_pc[ row ]; - const int pcEnd = A_pc[ row + 1 ]; - - const int * ia = A_ia + pcBeg ; - const MATRIX_SCALAR * a = A_a + pcBeg ; - const MATRIX_SCALAR * const a_end = A_a + pcEnd ; - - VECTOR_SCALAR y_tmp = 0 ; - for ( ; a != a_end ; ++a , ++ia ) { - y_tmp += *a * x[ *ia ]; - } - y[ row ] = y_tmp ; - } -} - -static void work_span( const int count , const int rank , - int * jBeg , int * jEnd ) -{ - const int length = *jEnd - *jBeg ; - const int chunk = ( length + count - 1 ) / count ; - const int begin = chunk * rank ; - int end = begin + chunk ; - - if ( length < end ) { end = length ; } - - *jEnd = *jBeg + end ; - *jBeg += begin ; -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -static void tpi_work_dot_join( TPI_Work * work , const void * src ) -{ *((double *) ( work->reduce) ) += *((const double *) src); } - -static void tpi_work_dot_init( TPI_Work * work ) -{ *((double *) ( work->reduce) ) = 0 ; } - -/*--------------------------------------------------------------------*/ - -struct work_dcrs { - const struct distributed_crs_matrix * matrix ; - const VECTOR_SCALAR * x ; - VECTOR_SCALAR * y ; - int jBeg ; - int jEnd ; -}; - -/*--------------------------------------------------------------------*/ - -static void tpi_work_dcrs_apply_and_dot( TPI_Work * work ) -{ - const struct work_dcrs * const info = (const struct work_dcrs *) work->info ; - - int local_begin = info->jBeg ; - int local_end = info->jEnd ; - - work_span( work->count , work->rank , & local_begin , & local_end ); - - dcrs_apply_and_dot_span( info->matrix , local_begin , local_end , - info->x , info->y , (double *) work->reduce ); -} - -double dcrs_apply_and_dot( - const struct distributed_crs_matrix * matrix , - VECTOR_SCALAR * x , - VECTOR_SCALAR * y , - const int overlap_communication ) -{ - struct work_dcrs info ; - - double result = 0.0 ; - - info.matrix = matrix ; - info.x = x ; - info.y = y ; - - if ( overlap_communication && - matrix->n_internal_row < matrix->n_local_row ) { - - double remote_result = 0 ; - - /* Start the internal matrix-vector multiply */ - /* result += dot( output = A * input , input ); */ - - info.jBeg = 0 ; - info.jEnd = matrix->n_internal_row ; - - /* Divide internal work evenly among worker threads. - * This leave the primary thread completely out of the computation. - */ - TPI_Start_threads_reduce( tpi_work_dcrs_apply_and_dot , & info , - tpi_work_dot_join , - tpi_work_dot_init , - sizeof(result) , & result ); - - get_off_process_entries( matrix , x ); - - TPI_Wait(); /* Wait for internal result */ - - info.jBeg = matrix->n_internal_row ; - info.jEnd = matrix->n_local_row ; - - TPI_Run_threads_reduce( tpi_work_dcrs_apply_and_dot , & info , - tpi_work_dot_join , - tpi_work_dot_init , - sizeof(remote_result) , & remote_result ); - - result += remote_result ; - } - else { - info.jBeg = 0 ; - info.jEnd = matrix->n_local_row ; - - get_off_process_entries( matrix , x ); - - TPI_Run_threads_reduce( tpi_work_dcrs_apply_and_dot , & info , - tpi_work_dot_join , - tpi_work_dot_init , - sizeof(result) , & result ); - } - - result = comm_sum( result ); - - return result ; -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_dcrs_apply( TPI_Work * work ) -{ - const struct work_dcrs * const info = (const struct work_dcrs *) work->info ; - - int local_begin = info->jBeg ; - int local_end = info->jEnd ; - - work_span( work->count , work->rank , & local_begin , & local_end ); - - dcrs_apply_span( info->matrix , local_begin , local_end , - info->x , info->y ); -} - -void dcrs_apply( - const struct distributed_crs_matrix * matrix , - VECTOR_SCALAR * x , - VECTOR_SCALAR * y ) -{ - struct work_dcrs info ; - - info.matrix = matrix ; - info.x = x ; - info.y = y ; - info.jBeg = 0 ; - info.jEnd = matrix->n_local_row ; - - get_off_process_entries( matrix , x ); - - TPI_Run_threads( tpi_work_dcrs_apply , & info , 0 ); -} - diff --git a/kokkos/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.h b/kokkos/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.h deleted file mode 100644 index 61f2032..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.h +++ /dev/null @@ -1,41 +0,0 @@ - -#ifndef dcrs_matrix_h -#define dcrs_matrix_h - -#include - -struct distributed_crs_matrix { - /* Global parallel */ - int p_size ; - int p_rank ; - int * p_recv_pc ; /* [np+1], span of received off-processor elements */ - int * p_send_pc ; /* [np+1], span of sent off-processor elements */ - int * p_send_id ; /* [send_pc[np]], indices of sent elements */ - - /* Local and local parallel */ - int n_local_column ; /* Number of local columns */ - int n_local_row ; /* Number of local rows */ - int n_internal_row ; /* Number of local rows with internal columns */ - int * A_pc ; /* Offsets into A_ia array for column indices */ - int * A_ia ; - MATRIX_SCALAR * A_a ; -}; - -/* 1) communicate off-processor portions of input. - * 2) apply: output = matrix * input ; - * 3) return: dot( output , input ); - */ -double dcrs_apply_and_dot( const struct distributed_crs_matrix * matrix , - VECTOR_SCALAR * input , - VECTOR_SCALAR * output , - const int overlap_communication ); - -/* 1) communicate off-processor portions of input. - * 2) apply: output = matrix * input ; - */ -void dcrs_apply( const struct distributed_crs_matrix * matrix , - VECTOR_SCALAR * input , - VECTOR_SCALAR * output ); - -#endif - diff --git a/kokkos/basic/optional/ThreadPool/test/hhpccg/main.c b/kokkos/basic/optional/ThreadPool/test/hhpccg/main.c deleted file mode 100644 index 57bb80a..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hhpccg/main.c +++ /dev/null @@ -1,422 +0,0 @@ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -/*--------------------------------------------------------------------*/ -static -void hpccg_alloc_and_fill( const int np , - const int my_p , - const int gbox[][2] , - const int ghost , - struct distributed_crs_matrix * const matrix ); - -/*--------------------------------------------------------------------*/ - -int main( int argc , char ** argv ) -{ - const int ghost = 1 ; - const int max_cube = 20 ; - int ncube[20] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , - 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }; - - FILE * print_file = stdout ; - int print_iter = 500 ; - int max_iter = 50 ; - int overlap_comm = 0 ; - - float tolerance = 0.0 ; /* Force max iterations */ - - int gbox[3][2] = { { 0 , 16 } , { 0 , 16 } , { 0 , 16 } }; - int nt = 0 ; - int trials = 6 ; - int ntest ; - int np = 1; - int my_p = 0 ; - -#ifdef HAVE_MPI - MPI_Init( & argc , & argv ); - MPI_Comm_size( MPI_COMM_WORLD , & np ); - MPI_Comm_rank( MPI_COMM_WORLD , & my_p ); -#endif - - if ( ! my_p ) { - const char arg_threads[] = "threads=" ; - const char arg_cube[] = "cube=" ; - const char arg_box[] = "box=" ; - const char arg_max[] = "max_iter=" ; - const char arg_trials[] = "trials=" ; - const char arg_print[] = "print_iter=" ; - const char arg_file[] = "print_file=" ; - const char arg_comm[] = "overlap_comm=" ; - const char arg_tolerance[] = "tolerance=" ; - int i ; - for ( i = 1 ; i < argc ; ++i ) { - if ( ! strncmp(argv[i],arg_threads,strlen(arg_threads)) ) { - sscanf(argv[i]+strlen(arg_threads),"%d",&nt); - } - else if ( ! strncmp(argv[i],arg_box,strlen(arg_box)) ) { - sscanf(argv[i]+strlen(arg_box),"%d%*[x]%d%*[x]%d", - & gbox[0][1] , & gbox[1][1] , & gbox[2][1] ); - } - else if ( ! strncmp(argv[i],arg_cube,strlen(arg_cube)) ) { - sscanf(argv[i]+strlen(arg_cube), - "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d", - ncube+0, ncube+1, ncube+2, ncube+3, ncube+4, - ncube+5, ncube+6, ncube+7, ncube+8, ncube+9, - ncube+10, ncube+11, ncube+12, ncube+13, ncube+14, - ncube+15, ncube+16, ncube+17, ncube+18, ncube+19); - } - else if ( ! strncmp(argv[i],arg_max,strlen(arg_max)) ) { - sscanf(argv[i]+strlen(arg_max),"%d",&max_iter); - } - else if ( ! strncmp(argv[i],arg_trials,strlen(arg_trials)) ) { - sscanf(argv[i]+strlen(arg_trials),"%d",&trials); - } - else if ( ! strncmp(argv[i],arg_print,strlen(arg_print)) ) { - sscanf(argv[i]+strlen(arg_print),"%d",&print_iter); - } - else if ( ! strncmp(argv[i],arg_comm,strlen(arg_comm)) ) { - sscanf(argv[i]+strlen(arg_print),"%d",&overlap_comm); - } - else if ( ! strncmp(argv[i],arg_tolerance,strlen(arg_tolerance)) ) { - sscanf(argv[i]+strlen(arg_print),"%f",&tolerance); - } - else if ( ! strncmp(argv[i],arg_file,strlen(arg_file)) ) { - char buffer[256] ; - sscanf(argv[i]+strlen(arg_file),"%s",buffer); - print_file = fopen(buffer,"a"); - } - } - } - -#ifdef HAVE_MPI - { - MPI_Bcast( & nt , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( & gbox[0][0] , 6 , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( ncube , max_cube , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( & overlap_comm , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( & max_iter , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( & print_iter , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( & trials , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( & tolerance , 1 , MPI_FLOAT , 0 , MPI_COMM_WORLD ); - } -#endif - - if ( nt ) { - TPI_Init( nt ); - TPI_Block(); - TPI_Unblock(); - } - - if ( ! my_p ) { - fprintf(print_file,"\"PROC\" , \"THREAD\" , \"EQUATION\" , \"NON-ZERO\" , \"FUSED-AVG\", \"FUSED-MAX\", \"BLAS-AVG\", \"BLAS-MAX\", \"FUSED\", \"BLAS\" , \"Iter\"\n"); - fprintf(print_file,"\"COUNT\", \"COUNT\" , \"COUNT\" , \"COUNT\" , \"Mflops\" , \"Mflops\" , \"Mflops\" , \"Mflops\" , \"error\", \"error\" , \"COUNT\"\n"); - } - - for ( ntest = 0 ; ! ntest || ( ntest < max_cube && ncube[ntest] ) ; ++ntest ) { - struct distributed_crs_matrix matrix ; - - if ( ncube[ntest] ) { - gbox[0][1] = gbox[1][1] = gbox[2][1] = ncube[ntest] ; - } - - hpccg_alloc_and_fill( np, my_p, (const int (*)[2]) gbox, ghost, &matrix); - - { - const int nRow = matrix.n_local_row ; - - double solve_dt[2] = { 0 , 0 }; - double solve_blas_dt[2] = { 0 , 0 }; - VECTOR_SCALAR norm_resid = 0.0 ; - VECTOR_SCALAR norm_resid_blas = 0.0 ; - int iter_count = 0 ; - int iter_count_blas = 0 ; - int k ; - - VECTOR_SCALAR * const b = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * nRow ); - VECTOR_SCALAR * const x = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * nRow ); - VECTOR_SCALAR * const x_blas = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * nRow ); - VECTOR_SCALAR * const xexact = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * nRow ); - - { - const VECTOR_SCALAR value = 1.0 /* 1.0 / 3.0 */ ; - int i ; - for ( i = 0 ; i < nRow ; ++i ) xexact[i] = value ; - } - - for ( k = 0 ; k < trials ; ++k ) { - double dt = 0 ; - int i ; - - for ( i = 0 ; i < nRow ; ++i ) { x_blas[i] = 0.0 ; } - - cgsolve_set_lhs( & matrix , xexact , b ); - - cgsolve_blas( & matrix, b, x_blas, - tolerance , max_iter , print_iter , - & iter_count_blas, & norm_resid_blas, & dt ); - - solve_blas_dt[0] += dt ; - if ( ! k || dt < solve_blas_dt[1] ) { solve_blas_dt[1] = dt ; } - } - - for ( k = 0 ; k < trials ; ++k ) { - double dt = 0 ; - int i ; - - for ( i = 0 ; i < nRow ; ++i ) { x[i] = 0.0 ; } - - cgsolve_set_lhs( & matrix , xexact , b ); - - cgsolve( & matrix, b, x, overlap_comm, - tolerance , max_iter , print_iter , - & iter_count, & norm_resid, & dt ); - - solve_dt[0] += dt ; - if ( ! k || dt < solve_dt[1] ) { solve_dt[1] = dt ; } - } - - { - int nnzGlobal = matrix.A_pc[ nRow ]; - double error[3] = { 0 , 0 , 0 }; - - for ( k = 0 ; k < nRow ; ++k ) { - error[0] += xexact[k] * xexact[k] ; - error[1] += ( x[k] - xexact[k] ) * ( x[k] - xexact[k] ); - error[2] += ( x_blas[k] - xexact[k] ) * ( x_blas[k] - xexact[k] ); - } - -#ifdef HAVE_MPI - { - double error_global[3] = { 0.0 , 0.0 , 0.0 }; - int nnz = nnzGlobal ; - - MPI_Allreduce( & nnz , & nnzGlobal , 1 , MPI_INT , MPI_SUM , - MPI_COMM_WORLD ); - - MPI_Allreduce( error , error_global , 3 , MPI_DOUBLE , MPI_SUM , - MPI_COMM_WORLD ); - - error[0] = error_global[0]; - error[1] = error_global[1]; - error[2] = error_global[2]; - } -#endif - - error[0] = sqrt( error[0] ); - error[1] = sqrt( error[1] ); - error[2] = sqrt( error[2] ); - - if ( ! my_p ) { - const int nRowGlobal = ( gbox[0][1] - gbox[0][0] ) * - ( gbox[1][1] - gbox[1][0] ) * - ( gbox[2][1] - gbox[2][0] ); - - const double dt_mean_fuse_step = 1.0e6 * solve_dt[0] / (double) trials ; - const double dt_mean_blas_step = 1.0e6 * solve_blas_dt[0] / (double) trials ; - const double dt_min_fuse_step = 1.0e6 * solve_dt[1] ; - const double dt_min_blas_step = 1.0e6 * solve_blas_dt[1] ; - - const double Mflop_step = 2 * nnzGlobal - + 3 * 2 * nRowGlobal - + 2 * 2 * nRowGlobal ; - - const double Mflop_mean_fuse = Mflop_step * iter_count / dt_mean_fuse_step ; - const double Mflop_mean_blas = Mflop_step * iter_count_blas / dt_mean_blas_step ; - - const double Mflop_max_fuse = Mflop_step * iter_count / dt_min_fuse_step ; - const double Mflop_max_blas = Mflop_step * iter_count_blas / dt_min_blas_step ; - - fprintf(print_file,"%8d , %8d , %8d , %8d , %10g , %10g , %10g , %10g , %10g , %10g , %d\n", - np , nt , nRowGlobal , nnzGlobal , - Mflop_mean_fuse , Mflop_max_fuse , - Mflop_mean_blas , Mflop_max_blas , - error[1] / error[0] , error[2] / error[0] , iter_count ); - fflush(print_file); - } - } - - free( xexact ); - free( x_blas ); - free( x ); - free( b ); - } - free( matrix.A_a ); - free( matrix.A_ia ); - free( matrix.A_pc ); - free( matrix.p_recv_pc ); - free( matrix.p_send_pc ); - free( matrix.p_send_id ); - } - - if ( nt ) { TPI_Finalize(); } - -#ifdef HAVE_MPI - MPI_Finalize(); -#endif - - return 0 ; -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -static -void hpccg_alloc_and_fill( const int np , - const int my_p , - const int gbox[][2] , - const int ghost , - struct distributed_crs_matrix * const matrix ) -{ - int (* const pbox)[3][2] = (int (*)[3][2]) malloc( sizeof(int)*np*3*2 ); - - const int (* const my_box)[2] = (const int (*)[2]) pbox[my_p] ; - - int my_uses_box[3][2] ; - int * map_local_ord = NULL; - - matrix->n_local_row = 0 ; - matrix->n_internal_row = 0 ; - matrix->A_pc = NULL ; - matrix->A_ia = NULL ; - matrix->A_a = NULL ; - - matrix->p_size = np ; - matrix->p_rank = my_p ; - matrix->p_recv_pc = NULL ; - matrix->p_send_pc = NULL ; - matrix->p_send_id = NULL ; - - /* Partition the global box */ - box_partition_rcb( np , gbox , pbox ); - - /* Upper bound */ - map_local_ord = (int *) malloc( sizeof(int) * - ( 2 * ghost + my_box[0][1]- my_box[0][0] ) * - ( 2 * ghost + my_box[1][1]- my_box[1][0] ) * - ( 2 * ghost + my_box[2][1]- my_box[2][0] ) ); - - /* Generate local layout with ghosting. */ - box_partition_map( np, my_p, gbox, - (const int (* const)[3][2]) pbox, - ghost, - my_uses_box , map_local_ord , - & matrix->n_internal_row , - & matrix->n_local_row , - & matrix->n_local_column , - & matrix->p_recv_pc , - & matrix->p_send_pc , - & matrix->p_send_id ); - - { - const int nrow = matrix->n_local_row ; - int * const pc = (int *) malloc( sizeof(int) * ( nrow + 1 ) ); - int * ia = NULL ; - MATRIX_SCALAR * a = NULL ; - - int ix , iy , iz ; - int sx , sy , sz ; - - /* Number of non zeros in each matrix row, - * then prefix the array for offsets. - */ - pc[0] = 0 ; - - for ( iz = my_box[2][0] ; iz < my_box[2][1] ; ++iz ) { - for ( iy = my_box[1][0] ; iy < my_box[1][1] ; ++iy ) { - for ( ix = my_box[0][0] ; ix < my_box[0][1] ; ++ix ) { - const int irow = box_map_local( (const int (*const)[2]) my_uses_box, map_local_ord, ix, iy, iz ); - int count = 1 ; /* Count the diagonal */ - - /* Count the off-diagonal terms to follow */ - for ( sz = -1 ; sz <= 1 ; ++sz ) { - for ( sy = -1 ; sy <= 1 ; ++sy ) { - for ( sx = -1 ; sx <= 1 ; ++sx ) { - const int g_ix = ix + sx ; - const int g_iy = iy + sy ; - const int g_iz = iz + sz ; - - if ( my_uses_box[0][0] <= g_ix && g_ix < my_uses_box[0][1] && - my_uses_box[1][0] <= g_iy && g_iy < my_uses_box[1][1] && - my_uses_box[2][0] <= g_iz && g_iz < my_uses_box[2][1] && - ! ( sz == 0 && sy == 0 && sx == 0 ) ) { - /* This column is within global bounds and is not a diagonal */ - ++count ; - } - } - } - } - pc[ irow + 1 ] = count ; - } - } - } - - for ( ix = 0 ; ix < nrow ; ++ix ) { pc[ix+1] += pc[ix] ; } - - ia = (int *) malloc( sizeof(int) * pc[ nrow ] ); - a = (MATRIX_SCALAR *) malloc( sizeof(MATRIX_SCALAR) * pc[ nrow ] ); - - for ( iz = my_box[2][0] ; iz < my_box[2][1] ; ++iz ) { - for ( iy = my_box[1][0] ; iy < my_box[1][1] ; ++iy ) { - for ( ix = my_box[0][0] ; ix < my_box[0][1] ; ++ix ) { - const int irow = box_map_local( (const int (*const)[2]) my_uses_box, map_local_ord, ix, iy, iz ); - int ipc = pc[ irow ]; - - /* Diagonal term first */ - ia[ ipc ] = irow ; - a[ ipc ] = 27.0f ; - ++ipc ; - - /* Off-diagonal terms to follow */ - for ( sz = -1 ; sz <= 1 ; ++sz ) { - for ( sy = -1 ; sy <= 1 ; ++sy ) { - for ( sx = -1 ; sx <= 1 ; ++sx ) { - const int g_ix = ix + sx ; - const int g_iy = iy + sy ; - const int g_iz = iz + sz ; - - if ( my_uses_box[0][0] <= g_ix && g_ix < my_uses_box[0][1] && - my_uses_box[1][0] <= g_iy && g_iy < my_uses_box[1][1] && - my_uses_box[2][0] <= g_iz && g_iz < my_uses_box[2][1] && - ! ( sz == 0 && sy == 0 && sx == 0 ) ) { - /* Column is within global bounds and is not a diagonal */ - /* 'icol' is mapped for communication */ - - const int icol = - box_map_local( (const int (*const)[2]) my_uses_box, map_local_ord, g_ix, g_iy, g_iz ); - - if ( icol < 0 ) { abort(); } - - ia[ ipc ] = icol ; - a[ ipc ] = -1.0f ; - ++ipc ; - } - } - } - } - if ( ipc != pc[ irow + 1 ] ) { abort(); } - } - } - } - - matrix->A_pc = pc ; - matrix->A_ia = ia ; - matrix->A_a = a ; - } - - free( map_local_ord ); - free( pbox ); -} - diff --git a/kokkos/basic/optional/ThreadPool/test/hhpccg/tpi_vector.c b/kokkos/basic/optional/ThreadPool/test/hhpccg/tpi_vector.c deleted file mode 100644 index e5cc365..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hhpccg/tpi_vector.c +++ /dev/null @@ -1,277 +0,0 @@ -#include -#include - -#include -#include -#include - -#if defined( HAVE_MPI ) -#include -#endif - -/*--------------------------------------------------------------------*/ - -struct tpi_work_vector { - VECTOR_SCALAR alpha ; - VECTOR_SCALAR beta ; - const VECTOR_SCALAR * x ; - const VECTOR_SCALAR * y ; - VECTOR_SCALAR * w ; - int n ; -}; - -void tpi_work_span( TPI_Work * const work , const int n , - int * const iBeg , int * const iEnd ) -{ - const int chunk = ( n + work->count - 1 ) / work->count ; - const int i_end = chunk + ( *iBeg = chunk * work->rank ); - - *iEnd = n < i_end ? n : i_end ; -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_fill( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR alpha = h->alpha ; - VECTOR_SCALAR * const w = h->w ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { w[i] = alpha ; } -} - -void tpi_fill( int n , VECTOR_SCALAR alpha , VECTOR_SCALAR * x ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - tmp.alpha = alpha ; - tmp.w = x ; - tmp.n = n ; - TPI_Run_threads( tpi_work_fill , & tmp , 0 ); -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_scale( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR beta = h->beta ; - VECTOR_SCALAR * const w = h->w ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { w[i] *= beta ; } -} - -void tpi_scale( int n , const VECTOR_SCALAR alpha , VECTOR_SCALAR * x ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - tmp.alpha = alpha ; - tmp.w = x ; - tmp.n = n ; - TPI_Run_threads( tpi_work_scale , & tmp , 0 ); -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_copy( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR * const x = h->x ; - VECTOR_SCALAR * const w = h->w ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { w[i] = x[i] ; } -} - -void tpi_copy( int n , const VECTOR_SCALAR * x , VECTOR_SCALAR * y ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - tmp.x = x ; - tmp.w = y ; - tmp.n = n ; - TPI_Run_threads( tpi_work_copy , & tmp , 0 ); -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_axpby( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR alpha = h->alpha ; - const VECTOR_SCALAR beta = h->beta ; - const VECTOR_SCALAR * const x = h->x ; - VECTOR_SCALAR * const w = h->w ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { w[i] = alpha * x[i] + beta * w[i] ; } -} - -void tpi_axpby( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , - VECTOR_SCALAR beta , VECTOR_SCALAR * y ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - tmp.alpha = alpha ; - tmp.beta = beta ; - tmp.x = x ; - tmp.w = y ; - tmp.n = n ; - - TPI_Run_threads( tpi_work_axpby , & tmp , 0 ); -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_axpy( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR alpha = h->alpha ; - const VECTOR_SCALAR * const x = h->x ; - VECTOR_SCALAR * const w = h->w ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { w[i] += alpha * x[i] ; } -} - -void tpi_axpy( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , - VECTOR_SCALAR * y ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - tmp.alpha = alpha ; - tmp.x = x ; - tmp.w = y ; - tmp.n = n ; - - TPI_Run_threads( tpi_work_axpy , & tmp , 0 ); -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_xpby( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR beta = h->beta ; - const VECTOR_SCALAR * const x = h->x ; - VECTOR_SCALAR * const w = h->w ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { w[i] = x[i] + beta * w[i] ; } -} - -void tpi_xpby( int n , const VECTOR_SCALAR * x , VECTOR_SCALAR beta , - VECTOR_SCALAR * y ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - tmp.beta = beta ; - tmp.x = x ; - tmp.w = y ; - tmp.n = n ; - - TPI_Run_threads( tpi_work_xpby , & tmp , 0 ); -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_dot_partial( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR * const x = h->x ; - const VECTOR_SCALAR * const y = h->y ; - double * const s = (double *) work->reduce ; - double tmp = *s ; - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { tmp += x[i] * y[i] ; } - - *s = tmp ; -} - -static void tpi_work_dot_partial_self( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR * const x = h->x ; - double * const s = (double *) work->reduce ; - double tmp = *s ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { const VECTOR_SCALAR d = x[i] ; tmp += d * d ; } - - *s = tmp ; -} - -static void tpi_work_dot_join( TPI_Work * work , const void * src ) -{ - *((double *) ( work->reduce) ) += *((const double *) src); -} - -static void tpi_work_dot_init( TPI_Work * work ) -{ - *((double *) ( work->reduce) ) = 0 ; -} - -double tpi_dot( int n , const VECTOR_SCALAR * x , const VECTOR_SCALAR * y ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - double result = 0.0 ; - tmp.x = x ; - tmp.y = y ; - tmp.n = n ; - if ( x != y ) { - TPI_Run_threads_reduce( tpi_work_dot_partial , & tmp , - tpi_work_dot_join , tpi_work_dot_init , - sizeof(result) , & result ); - } - else { - TPI_Run_threads_reduce( tpi_work_dot_partial_self , & tmp , - tpi_work_dot_join , tpi_work_dot_init , - sizeof(result) , & result ); - } -#if defined HAVE_MPI - { - double tmp = result ; - MPI_Allreduce( & tmp , & result , 1 , MPI_DOUBLE , MPI_SUM , MPI_COMM_WORLD ); - } -#endif - return result ; -} - -/*--------------------------------------------------------------------*/ - diff --git a/kokkos/basic/optional/ThreadPool/test/hhpccg/tpi_vector.h b/kokkos/basic/optional/ThreadPool/test/hhpccg/tpi_vector.h deleted file mode 100644 index fba628f..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hhpccg/tpi_vector.h +++ /dev/null @@ -1,30 +0,0 @@ - -#ifndef tpi_vector_h -#define tpi_vector_h - -#define VECTOR_SCALAR float -#define MATRIX_SCALAR float - -void tpi_fill( int n , VECTOR_SCALAR alpha , VECTOR_SCALAR * x ); - -void tpi_scale( int n , const VECTOR_SCALAR alpha , VECTOR_SCALAR * x ); - -void tpi_copy( int n , const VECTOR_SCALAR * x , VECTOR_SCALAR * y ); - -void tpi_xpby( int n , const VECTOR_SCALAR * x , - VECTOR_SCALAR beta , VECTOR_SCALAR * y ); - -void tpi_axpy( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , - VECTOR_SCALAR * y ); - -void tpi_axpby( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , - VECTOR_SCALAR beta , VECTOR_SCALAR * y ); - -double tpi_dot( int n , const VECTOR_SCALAR * x , - const VECTOR_SCALAR * y ); - -void tpi_work_span( TPI_Work * const work , const int n , - int * const iBeg , int * const iEnd ); - -#endif - diff --git a/kokkos/basic/optional/ThreadPool/test/hpccg/BoxPartition.c b/kokkos/basic/optional/ThreadPool/test/hpccg/BoxPartition.c deleted file mode 100644 index ef860ae..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hpccg/BoxPartition.c +++ /dev/null @@ -1,487 +0,0 @@ - -#include -#include - -#include - -/*--------------------------------------------------------------------*/ - -static int box_map_local_entry( const int box[][2] , - const int ghost , - int local_x , - int local_y , - int local_z ) -{ - const int nx = 2 * ghost + box[0][1] - box[0][0] ; - const int ny = 2 * ghost + box[1][1] - box[1][0] ; - const int nz = 2 * ghost + box[2][1] - box[2][0] ; - int result = -1 ; - - local_x += ghost ; - local_y += ghost ; - local_z += ghost ; - - if ( 0 <= local_x && local_x < nx && - 0 <= local_y && local_y < ny && - 0 <= local_z && local_z < nz ) { - - result = local_z * ny * nx + local_y * nx + local_x ; - } - return result ; -} - -int box_map_local( const int box_local[][2] , - const int ghost , - const int box_local_map[] , - const int local_x , - const int local_y , - const int local_z ) -{ - int result = box_map_local_entry(box_local,ghost,local_x,local_y,local_z); - - if ( 0 <= result ) { - result = box_local_map[ result ]; - } - - return result ; -} - -/*--------------------------------------------------------------------*/ -/* Recursively split a box into into (up-ip) sub-boxes */ - -static -void box_partition( int ip , int up , int axis , - const int box[3][2] , - int p_box[][3][2] ) -{ - const int np = up - ip ; - if ( 1 == np ) { - p_box[ip][0][0] = box[0][0] ; p_box[ip][0][1] = box[0][1] ; - p_box[ip][1][0] = box[1][0] ; p_box[ip][1][1] = box[1][1] ; - p_box[ip][2][0] = box[2][0] ; p_box[ip][2][1] = box[2][1] ; - } - else { - const int n = box[ axis ][1] - box[ axis ][0] ; - const int np_low = np / 2 ; /* Rounded down */ - const int np_upp = np - np_low ; - - const int n_upp = (int) (((double) n) * ( ((double)np_upp) / ((double)np))); - const int n_low = n - n_upp ; - const int next_axis = ( axis + 2 ) % 3 ; - - if ( np_low ) { /* P = [ip,ip+np_low) */ - int dbox[3][2] ; - dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; - dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; - dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; - - dbox[ axis ][1] = dbox[ axis ][0] + n_low ; - - box_partition( ip, ip + np_low, next_axis, - (const int (*)[2]) dbox, p_box ); - } - - if ( np_upp ) { /* P = [ip+np_low,ip+np_low+np_upp) */ - int dbox[3][2] ; - dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; - dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; - dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; - - ip += np_low ; - dbox[ axis ][0] += n_low ; - dbox[ axis ][1] = dbox[ axis ][0] + n_upp ; - - box_partition( ip, ip + np_upp, next_axis, - (const int (*)[2]) dbox, p_box ); - } - } -} - -/*--------------------------------------------------------------------*/ - -static int box_disjoint( const int a[3][2] , const int b[3][2] ) -{ - return a[0][1] <= b[0][0] || b[0][1] <= a[0][0] || - a[1][1] <= b[1][0] || b[1][1] <= a[1][0] || - a[2][1] <= b[2][0] || b[2][1] <= a[2][0] ; -} - -static void resize_int( int ** a , int * allocLen , int newLen ) -{ - int k = 32; - while ( k < newLen ) { k <<= 1 ; } - if ( NULL == *a ) - { *a = malloc( sizeof(int)*(*allocLen = k) ); } - else if ( *allocLen < k ) - { *a = realloc(*a , sizeof(int)*(*allocLen = k)); } -} - -static void box_partition_maps( - const int np , - const int my_p , - const int pbox[][3][2] , - const int ghost , - int ** map_local_id , - int ** map_recv_pc , - int ** map_send_pc , - int ** map_send_id ) -{ - const int (*my_box)[2] = pbox[my_p] ; - - const int my_ix = my_box[0][0] ; - const int my_iy = my_box[1][0] ; - const int my_iz = my_box[2][0] ; - const int my_nx = my_box[0][1] - my_box[0][0] ; - const int my_ny = my_box[1][1] - my_box[1][0] ; - const int my_nz = my_box[2][1] - my_box[2][0] ; - - const int my_use_nx = 2 * ghost + my_nx ; - const int my_use_ny = 2 * ghost + my_ny ; - const int my_use_nz = 2 * ghost + my_nz ; - - const int id_length = my_use_nx * my_use_ny * my_use_nz ; - - int * local_id = (int *) malloc( id_length * sizeof(int) ); - int * recv_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); - int * send_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); - - int * send_id = NULL ; - int send_id_size = 0 ; - - int iLocal , iSend ; - int i ; - - int my_use_box[3][2] ; - - my_use_box[0][0] = my_box[0][0] - ghost ; - my_use_box[0][1] = my_box[0][1] + ghost ; - my_use_box[1][0] = my_box[1][0] - ghost ; - my_use_box[1][1] = my_box[1][1] + ghost ; - my_use_box[2][0] = my_box[2][0] - ghost ; - my_use_box[2][1] = my_box[2][1] + ghost ; - - for ( i = 0 ; i < id_length ; ++i ) { local_id[i] = -1 ; } - - iSend = 0 ; - iLocal = 0 ; - - /* The vector space is partitioned by processors */ - - for ( i = 0 ; i < np ; ++i ) { - const int ip = ( i + my_p ) % np ; - recv_pc[i] = iLocal ; - send_pc[i] = iSend ; - - if ( ! box_disjoint( (const int (*)[2]) my_use_box , pbox[ip] ) ) { - const int p_ix = pbox[ip][0][0] ; - const int p_iy = pbox[ip][1][0] ; - const int p_iz = pbox[ip][2][0] ; - const int p_ex = pbox[ip][0][1] ; - const int p_ey = pbox[ip][1][1] ; - const int p_ez = pbox[ip][2][1] ; - - int local_x , local_y , local_z ; - - /* Run the span of global cells that my processor uses */ - - for ( local_z = -ghost ; local_z < my_nz + ghost ; ++local_z ) { - for ( local_y = -ghost ; local_y < my_ny + ghost ; ++local_y ) { - for ( local_x = -ghost ; local_x < my_nx + ghost ; ++local_x ) { - - const int global_z = local_z + my_iz ; - const int global_y = local_y + my_iy ; - const int global_x = local_x + my_ix ; - - const int entry = - box_map_local_entry(my_box,ghost,local_x,local_y,local_z); - - if ( entry < 0 ) { abort(); } - - if ( p_iz <= global_z && global_z < p_ez && - p_iy <= global_y && global_y < p_ey && - p_ix <= global_x && global_x < p_ex ) { - - /* This ordinal is owned by processor 'ip' */ - - local_id[ entry ] = iLocal++ ; - -#if defined(DEBUG_PRINT) -if ( my_p != ip ) { - fprintf(stdout," (%d,%d,%d) : P%d recv at local %d from P%d\n", - global_x,global_y,global_z,my_p,local_id[entry],ip); - fflush(stdout); -} -#endif - } - - /* If in my ownership and used by the other processor */ - if ( my_p != ip && - /* In my ownership: */ - ( 0 <= local_z && local_z < my_nz && - 0 <= local_y && local_y < my_ny && - 0 <= local_x && local_x < my_nx ) && - /* In other processors usage: */ - ( p_iz - ghost <= global_z && global_z < p_ez + ghost && - p_iy - ghost <= global_y && global_y < p_ey + ghost && - p_ix - ghost <= global_x && global_x < p_ex + ghost ) ) { - - resize_int( & send_id , & send_id_size , (iSend + 1) ); - send_id[ iSend ] = local_id[ entry ] ; - ++iSend ; - -#if defined(DEBUG_PRINT) -{ - fprintf(stdout," (%d,%d,%d) : P%d send at local %d to P%d\n", - global_x,global_y,global_z,my_p,local_id[entry],ip); - fflush(stdout); -} -#endif - } - } - } - } - } - } - recv_pc[np] = iLocal ; - send_pc[np] = iSend ; - - *map_local_id = local_id ; - *map_recv_pc = recv_pc ; - *map_send_pc = send_pc ; - *map_send_id = send_id ; -} - -void box_partition_rcb( const int np , - const int my_p , - const int root_box[][2] , - const int ghost , - int (**pbox)[3][2] , - int ** map_local_id , - int ** map_recv_pc , - int ** map_send_pc , - int ** map_send_id ) -{ - *pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); - - box_partition( 0 , np , 2 , root_box , *pbox ); - - box_partition_maps( np , my_p , (const int (*)[3][2]) *pbox , ghost , - map_local_id , map_recv_pc , - map_send_pc , map_send_id ); -} - -/*--------------------------------------------------------------------*/ - -#ifdef UNIT_TEST - -static int box_contain( const int a[3][2] , const int b[3][2] ) -{ - return a[0][0] <= b[0][0] && b[0][1] <= a[0][1] && - a[1][0] <= b[1][0] && b[1][1] <= a[1][1] && - a[2][0] <= b[2][0] && b[2][1] <= a[2][1] ; -} - -static void box_print( FILE * fp , const int a[][2] ) -{ - fprintf(fp,"{ [ %d , %d ) , [ %d , %d ) , [ %d , %d ) }", - a[0][0] , a[0][1] , - a[1][0] , a[1][1] , - a[2][0] , a[2][1] ); -} - -static void test_box( const int box[3][2] , const int np ) -{ - const int ncell_box = box[0][1] * box[1][1] * box[2][1] ; - int ncell_total = 0 ; - int ncell_min = ncell_box ; - int ncell_max = 0 ; - int (*pbox)[3][2] ; - int i , j ; - - pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); - - box_partition( 0 , np , 2 , box , pbox ); - - for ( i = 0 ; i < np ; ++i ) { - const int ncell = ( pbox[i][0][1] - pbox[i][0][0] ) * - ( pbox[i][1][1] - pbox[i][1][0] ) * - ( pbox[i][2][1] - pbox[i][2][0] ); - - if ( ! box_contain( box , (const int (*)[2]) pbox[i] ) ) { - fprintf(stdout," OUT OF BOUNDS pbox[%d/%d] = ",i,np); - box_print(stdout,(const int (*)[2]) pbox[i]); - fprintf(stdout,"\n"); - abort(); - } - - for ( j = i + 1 ; j < np ; ++j ) { - if ( ! box_disjoint( (const int (*)[2]) pbox[i] , - (const int (*)[2]) pbox[j] ) ) { - fprintf(stdout," NOT DISJOINT pbox[%d/%d] = ",i,np); - box_print(stdout, (const int (*)[2]) pbox[i]); - fprintf(stdout,"\n"); - fprintf(stdout," pbox[%d/%d] = ",j,np); - box_print(stdout, (const int (*)[2]) pbox[j]); - fprintf(stdout,"\n"); - abort(); - } - } - ncell_total += ncell ; - - if ( ncell_max < ncell ) { ncell_max = ncell ; } - if ( ncell < ncell_min ) { ncell_min = ncell ; } - } - - if ( ncell_total != ncell_box ) { - fprintf(stdout," WRONG CELL COUNT NP = %d\n",np); - abort(); - } - fprintf(stdout,"NP = %d, total = %d, avg = %d, min = %d, max = %d\n", - np,ncell_box,ncell_box/np,ncell_min,ncell_max); - - free( pbox ); -} - -/*--------------------------------------------------------------------*/ - -static void test_maps( const int root_box[][2] , const int np ) -{ - const int ghost = 1 ; - const int nx_global = root_box[0][1] - root_box[0][0] ; - const int ny_global = root_box[1][1] - root_box[1][0] ; - int ieq , i , j ; - int (*pbox)[3][2] ; - int **local_values ; - int **map_local_id ; - int **map_recv_pc ; - int **map_send_pc ; - int **map_send_id ; - - pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); - - box_partition( 0 , np , 2 , root_box , pbox ); - - local_values = (int **) malloc( sizeof(int*) * np ); - map_local_id = (int **) malloc( sizeof(int*) * np ); - map_recv_pc = (int **) malloc( sizeof(int*) * np ); - map_send_pc = (int **) malloc( sizeof(int*) * np ); - map_send_id = (int **) malloc( sizeof(int*) * np ); - - /* Set each local value to the global equation number */ - - for ( ieq = i = 0 ; i < np ; ++i ) { - const int (*mybox)[2] = (const int (*)[2]) pbox[i] ; - const int nx = mybox[0][1] - mybox[0][0] ; - const int ny = mybox[1][1] - mybox[1][0] ; - const int nz = mybox[2][1] - mybox[2][0] ; - int ix , iy , iz ; - - /* Generate the partition maps for this rank */ - box_partition_maps( np , i , (const int (*)[3][2]) pbox , ghost , - & map_local_id[i] , & map_recv_pc[i] , - & map_send_pc[i] , & map_send_id[i] ); - - local_values[i] = (int *) malloc( sizeof(int) * map_recv_pc[i][np] ); - - for ( iz = -ghost ; iz < nz + ghost ; ++iz ) { - for ( iy = -ghost ; iy < ny + ghost ; ++iy ) { - for ( ix = -ghost ; ix < nx + ghost ; ++ix ) { - const int ieq = box_map_local(mybox,ghost,map_local_id[i],ix,iy,iz); - - if ( 0 <= ieq ) { - const int ix_global = ix + mybox[0][0] ; - const int iy_global = iy + mybox[1][0] ; - const int iz_global = iz + mybox[2][0] ; - - if ( root_box[0][0] <= ix_global && ix_global < root_box[0][1] && - root_box[1][0] <= iy_global && iy_global < root_box[1][1] && - root_box[2][0] <= iz_global && iz_global < root_box[2][1] ) { - - local_values[i][ ieq ] = ix_global + - iy_global * nx_global + - iz_global * nx_global * ny_global ; - } - else { - local_values[i][ ieq ] = -1 ; - } - } - } - } - } - } - - /* Pair-wise compare the local values */ - /* i == receiving processor rank */ - /* ip == sending processor rank */ - /* j == receiving processor data entry for message from 'ip' */ - /* jp == sending processor data entry for message to 'i' */ - - for ( i = 0 ; i < np ; ++i ) { - for ( j = 1 ; j < np ; ++j ) { - const int ip = ( i + j ) % np ; - const int jp = ( i + np - ip ) % np ; - const int nrecv = map_recv_pc[i] [j+1] - map_recv_pc[i] [j] ; - const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ; - int k ; - if ( nrecv != nsend ) { - fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip); - fprintf(stderr,"P%d send %d to P%d\n",ip,nsend,i); - abort(); - } - for ( k = 0 ; k < nrecv ; ++k ) { - const int irecv = map_recv_pc[i][j] + k ; - const int isend = map_send_pc[ip][jp] + k ; - const int val_irecv = local_values[i][irecv] ; - const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ; - if ( val_irecv != val_isend ) { - fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip); - fprintf(stderr,"P%d send[%d] = %d , to P%d\n",ip,k,val_isend,i); - abort(); - } - } - } - } - - for ( i = 0 ; i < np ; ++i ) { - free( map_local_id[i] ); - free( map_recv_pc[i] ); - free( map_send_pc[i] ); - free( map_send_id[i] ); - free( local_values[i] ); - } - free( map_send_id ); - free( map_send_pc ); - free( map_recv_pc ); - free( map_local_id ); - free( local_values ); - free( pbox ); -} - -/*--------------------------------------------------------------------*/ - -int main( int argc , char * argv[] ) -{ - int np_max = 256 ; - int box[3][2] = { { 0 , 64 } , { 0 , 64 } , { 0 , 64 } }; - int np = 0 ; - - switch( argc ) { - case 3: - sscanf(argv[1],"%d",&np); - sscanf(argv[2],"%dx%dx%d",& box[0][1] , & box[1][1] , & box[2][1] ); - if ( 0 < np ) { test_box( (const int (*)[2]) box , np ); } - if ( 0 < np ) { test_maps( (const int (*)[2]) box , np ); } - break ; - default: - for ( np = 1 ; np <= np_max ; ++np ) { - test_box( (const int (*)[2]) box , np ); - test_maps( (const int (*)[2]) box , np ); - } - break ; - } - return 0 ; -} - -#endif - - diff --git a/kokkos/basic/optional/ThreadPool/test/hpccg/BoxPartition.h b/kokkos/basic/optional/ThreadPool/test/hpccg/BoxPartition.h deleted file mode 100644 index 3dfd839..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hpccg/BoxPartition.h +++ /dev/null @@ -1,64 +0,0 @@ - -/** \brief Partition a { [ix,jx) X [iy,jy) X [iz,jz) } box. - * - * Use recursive coordinate bisection to partition a box - * into np disjoint sub-boxes. Allocate (via malloc) and - * populate the sub-boxes, mapping the local (x,y,z) to - * a local ordinal, and mappings for the send-recv messages - * to update the ghost cells. - * - * usage: - * - * my_nx = pbox[my_p][0][1] - pbox[my_p][0][0] ; - * my_ny = pbox[my_p][1][1] - pbox[my_p][1][0] ; - * my_nz = pbox[my_p][2][1] - pbox[my_p][2][0] ; - * - * for ( x = -ghost ; x < my_nx + ghost ; ++x ) { - * for ( y = -ghost ; y < my_ny + ghost ; ++y ) { - * for ( z = -ghost ; z < my_nz + ghost ; ++z ) { - * const int x_global = x + pbox[my_p][0][0] ; - * const int y_global = y + pbox[my_p][1][0] ; - * const int z_global = z + pbox[my_p][2][0] ; - * - * const int local_ordinal = - * box_map_local( pbox[my_p], ghost, map_local_id, x, y, z ); - * - * if ( 0 <= local_ordinal ) { - * } - * } - * - * for ( i = 1 ; i < np ; ++i ) { - * const int recv_processor = ( my_p + i ) % np ; - * const int recv_ordinal_begin = map_recv_pc[i]; - * const int recv_ordinal_end = map_recv_pc[i+1]; - * } - * - * for ( i = 1 ; i < np ; ++i ) { - * const int send_processor = ( my_p + i ) % np ; - * const int send_map_begin = map_send_pc[i]; - * const int send_map_end = map_send_pc[i+1]; - * for ( j = send_map_begin ; j < send_map_end ; ++j ) { - * send_ordinal = map_send_id[j] ; - * } - * } - */ -void box_partition_rcb( - const int np /**< [in] Number of partitions */ , - const int my_p /**< [in] My partition rank */ , - const int root_box[][2] /**< [in] 3D Box to partition */ , - const int ghost /**< [in] Ghost cell boundary */ , - int (**pbox)[3][2] /**< [out] Partition's 3D boxes */ , - int ** map_local_id /**< [out] Map local cells */ , - int ** map_recv_pc /**< [out] Receive spans per processor */ , - int ** map_send_pc /**< [out] Send prefix counts per processor */ , - int ** map_send_id /**< [out] Send message ordinals */ ); - -/* \brief Map a local (x,y,z) to a local ordinal. - */ -int box_map_local( const int box_local[][2] , - const int ghost , - const int map_local_id[] , - const int local_x , - const int local_y , - const int local_z ); - diff --git a/kokkos/basic/optional/ThreadPool/test/hpccg/CGSolver.c b/kokkos/basic/optional/ThreadPool/test/hpccg/CGSolver.c deleted file mode 100644 index 2670bf7..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hpccg/CGSolver.c +++ /dev/null @@ -1,248 +0,0 @@ - -#include -#include -#include -#include - -#include -#include -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -/*--------------------------------------------------------------------*/ - -#ifdef HAVE_MPI - -#define TIMER( DT , F ) \ - { double tb , te , tbg , teg , dt ; \ - tb = TPI_Walltime(); \ - F ; \ - te = TPI_Walltime(); \ - MPI_Allreduce(&tb, &tbg, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); \ - MPI_Allreduce(&te, &teg, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); \ - DT[0] += dt = teg - tbg ; \ - DT[1] += dt * dt ; } - -#else - -#define TIMER( DT , F ) \ - { const double tb = TPI_Walltime(); double dt ; \ - F ; \ - DT[0] += dt = TPI_Walltime() - tb ; \ - DT[1] += dt * dt ; } - -#endif - -/*--------------------------------------------------------------------*/ - -static -VECTOR_SCALAR comm_sum( VECTOR_SCALAR v ) -{ -#ifdef HAVE_MPI - VECTOR_SCALAR result = 0 ; - if ( sizeof(VECTOR_SCALAR) == sizeof(double) ) { - MPI_Allreduce( & v , & result , 1 , MPI_DOUBLE , MPI_SUM , MPI_COMM_WORLD ); - } - else { - MPI_Allreduce( & v , & result , 1 , MPI_FLOAT , MPI_SUM , MPI_COMM_WORLD ); - } - return result ; -#else - return v ; -#endif -} - -#ifdef HAVE_MPI -static -void comm_rhs_vector( const struct cgsolve_data * const data , - VECTOR_SCALAR * const vec ) -{ - const int np = data->np ; - const int my_p = data->ip ; - const int * const recv_pc = data->recv_pc ; - const int * const send_pc = data->send_pc ; - const int * const send_id = data->send_id ; - int i , irecv ; - - for ( irecv = 0 , i = 1 ; i < np ; ++i ) { - if ( recv_pc[i] < recv_pc[i+1] ) ++irecv ; - } - -#ifdef DEBUG_PRINT - fflush(stdout); - MPI_Barrier( MPI_COMM_WORLD ); - fflush(stdout); -#endif - - { - VECTOR_SCALAR * const send_buf = - (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * send_pc[np] ); - - MPI_Request * const recv_request = - (MPI_Request *) malloc( sizeof(MPI_Request) * irecv ); - - MPI_Status * const recv_status = - (MPI_Status *) malloc( sizeof(MPI_Status) * irecv ); - - for ( irecv = 0 , i = 1 ; i < np ; ++i ) { - const int ip = ( i + my_p ) % np ; - const int recv_beg = recv_pc[i]; - const int recv_length = recv_pc[i+1] - recv_beg ; - if ( recv_length ) { -#ifdef DEBUG_PRINT - fprintf(stdout," comm_rhs_vector P%d Irecv P%d : %d\n", - my_p, ip, recv_length ); - fflush(stdout); -#endif - MPI_Irecv( vec + recv_beg , - recv_length * sizeof(VECTOR_SCALAR), MPI_BYTE , - ip , 0 , MPI_COMM_WORLD , recv_request + irecv ); - ++irecv ; - } - } - - /* Gather components into send buffer */ - - for ( i = 0 ; i < send_pc[np] ; ++i ) { - send_buf[i] = vec[ send_id[i] ]; - } - - MPI_Barrier( MPI_COMM_WORLD ); - - for ( i = 1 ; i < np ; ++i ) { - const int ip = ( i + my_p ) % np ; - const int send_beg = send_pc[i]; - const int send_length = send_pc[i+1] - send_beg ; - if ( send_length ) { /* Send to 'i' */ -#ifdef DEBUG_PRINT - fprintf(stdout," comm_rhs_vector P%d Rsend P%d : %d\n", - my_p, ip, send_length ); - fflush(stdout); -#endif - MPI_Rsend( send_buf + send_beg , - send_length * sizeof(VECTOR_SCALAR), MPI_BYTE , - ip , 0 , MPI_COMM_WORLD ); - } - } - - MPI_Waitall( irecv , recv_request , recv_status ); - - free( recv_status ); - free( recv_request ); - free( send_buf ); - } -} -#else -#define comm_rhs_vector( D , V ) /* */ -#endif - -/*--------------------------------------------------------------------*/ - -void cgsolve_set_lhs( const struct cgsolve_data * const data , - const VECTOR_SCALAR * const x , - VECTOR_SCALAR * const b ) -{ - const int nRow = data->nRow ; - const int nVec = data->recv_pc[ data->np ] ; - const int * const A_pc = data->A_pc ; - const int * const A_ia = data->A_ia ; - const MATRIX_SCALAR * const A_a = data->A_a ; - - VECTOR_SCALAR * const p = (VECTOR_SCALAR *) malloc( nVec * sizeof(VECTOR_SCALAR) ); - - tpi_copy( nRow , x , p ); - - comm_rhs_vector( data , p ); - - tpi_crs_matrix_apply( nRow, A_pc, A_ia, A_a, p, b ); - - free( p ); -} - -/*--------------------------------------------------------------------*/ - -void cgsolve( const struct cgsolve_data * const data , - const VECTOR_SCALAR * const b , - VECTOR_SCALAR * const x , - int * const iter_count , - VECTOR_SCALAR * const norm_resid , - double * const dt_mxv , - double * const dt_axpby , - double * const dt_dot ) -{ - const int nRow = data->nRow ; - const int nVec = data->recv_pc[ data->np ] ; - const int max_iter = data->max_iter ; - const int print_iter = data->print_iter ; - const int * const A_pc = data->A_pc ; - const int * const A_ia = data->A_ia ; - const MATRIX_SCALAR * const A_a = data->A_a ; - const VECTOR_SCALAR tolerance = data->tolerance ; - - const VECTOR_SCALAR tol_2 = tolerance * tolerance ; - - VECTOR_SCALAR * const r = (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); - VECTOR_SCALAR * const p = (VECTOR_SCALAR *) malloc( nVec * sizeof(VECTOR_SCALAR) ); - VECTOR_SCALAR * const Ap = (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); - - VECTOR_SCALAR rtrans = 0.0 ; - - int k ; - - tpi_copy( nRow , b , r ); - tpi_copy( nRow , x , p ); - - comm_rhs_vector( data , p ); tpi_crs_matrix_apply( nRow, A_pc, A_ia, A_a, p, Ap ); - - tpi_axpby( nRow , -1.0, Ap, 1.0 , r ); - - /* Include timing dot product for 2 * #iter dot products */ - TIMER( dt_dot , rtrans = comm_sum( tpi_dot( nRow , r , r ) ) ); - - for ( k = 0 ; k < max_iter && tol_2 < rtrans ; ++k ) { - VECTOR_SCALAR alpha ; - VECTOR_SCALAR beta = 0.0 ; - VECTOR_SCALAR pAp = 0.0 ; - - if ( k ) { - const VECTOR_SCALAR oldrtrans = rtrans ; - TIMER( dt_dot , rtrans = comm_sum( tpi_dot( nRow , r , r ) ) ); - beta = rtrans / oldrtrans ; - } - - TIMER( dt_axpby , tpi_axpby( nRow, 1.0, r, beta, p ) ); - - TIMER( dt_mxv , comm_rhs_vector( data , p ); tpi_crs_matrix_apply( nRow, A_pc, A_ia, A_a, p, Ap ) ); - - TIMER( dt_dot , pAp = comm_sum( tpi_dot( nRow , p , Ap ) ) ); - - if ( 0 < fabs( pAp ) ) { - alpha = rtrans / pAp ; - } - else { - alpha = rtrans = 0.0 ; /* Orthogonal, cannot continue */ - } - - if ( ! ( ( k + 1 ) % print_iter ) ) { - fprintf(stdout," cgsolve | r(%d) | = %g\n",k,sqrt(rtrans)); - fflush(stdout); - } - - TIMER( dt_axpby , tpi_axpby( nRow , alpha, p, 1.0, x) ); - TIMER( dt_axpby , tpi_axpby( nRow , -alpha, Ap, 1.0, r) ); - } - - *norm_resid = sqrt( rtrans ); - *iter_count = k ; - - free( Ap ); - free( p ); - free( r ); -} - -/*--------------------------------------------------------------------*/ - diff --git a/kokkos/basic/optional/ThreadPool/test/hpccg/CGSolver.h b/kokkos/basic/optional/ThreadPool/test/hpccg/CGSolver.h deleted file mode 100644 index 0660a01..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hpccg/CGSolver.h +++ /dev/null @@ -1,32 +0,0 @@ - -#include - -struct cgsolve_data { - int nRow ; - int * A_pc ; - int * A_ia ; - MATRIX_SCALAR * A_a ; - int max_iter ; - int print_iter ; - VECTOR_SCALAR tolerance ; - - int np ; - int ip ; - int * recv_pc ; - int * send_pc ; - int * send_id ; -}; - -void cgsolve_set_lhs( const struct cgsolve_data * data , - const VECTOR_SCALAR * const x , - VECTOR_SCALAR * const b ); - -void cgsolve( const struct cgsolve_data * data , - const VECTOR_SCALAR * const b , - VECTOR_SCALAR * const x , - int * const iter_count , - VECTOR_SCALAR * const norm_resid , - double * const dt_mxv , - double * const dt_axpby , - double * const dt_dot ); - diff --git a/kokkos/basic/optional/ThreadPool/test/hpccg/CMakeLists.txt b/kokkos/basic/optional/ThreadPool/test/hpccg/CMakeLists.txt deleted file mode 100644 index bfba897..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hpccg/CMakeLists.txt +++ /dev/null @@ -1,83 +0,0 @@ - -INCLUDE(PackageAddExecutableAndTest) -INCLUDE(PackageLibraryMacros) - -#################### - -SET(HEADERS "") -SET(SOURCES "") - -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) - -SET(HEADERS ${HEADERS} - ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h - ) - -INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) - -APPEND_SET(HEADERS - BoxPartition.h - CGSolver.h - tpi_vector.h - ) - -#################### - - -PACKAGE_ADD_EXECUTABLE( - test_tpi_hpccg - COMM serial mpi - SOURCES main.c CGSolver.c BoxPartition.c tpi_vector.c - DEPLIBS pthread m - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hpccg - NAME test_tpi_hpccg_serial_1 - COMM serial - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hpccg - NAME test_tpi_hpccg_serial_2 - COMM serial - ARGS "threads=2" - DIRECTORY . - XHOSTTYPE AIX - ) - -PACKAGE_ADD_TEST( - test_tpi_hpccg - NAME test_tpi_hpccg_serial_4 - COMM serial - ARGS "threads=4" - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hpccg - NAME test_tpi_hpccg_mpi_1 - COMM mpi - NUM_MPI_PROCS 1 - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hpccg - NAME test_tpi_hpccg_mpi_2 - COMM mpi - NUM_MPI_PROCS 2 - DIRECTORY . - ) - -PACKAGE_ADD_TEST( - test_tpi_hpccg - NAME test_tpi_hpccg_mpi_4 - COMM mpi - NUM_MPI_PROCS 4 - DIRECTORY . - ) - - diff --git a/kokkos/basic/optional/ThreadPool/test/hpccg/main.c b/kokkos/basic/optional/ThreadPool/test/hpccg/main.c deleted file mode 100644 index 676a02d..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hpccg/main.c +++ /dev/null @@ -1,340 +0,0 @@ - -#include -#include -#include -#include - -#include -#include -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -/*--------------------------------------------------------------------*/ - -static -void hpccg_alloc_and_fill( const int np , - const int my_p , - const int gbox[][2] , - const int ghost , - struct cgsolve_data * const data ) -{ - int (*pbox)[3][2] = NULL ; - int * map_local_ord = NULL; - - data->nRow = 0 ; - data->A_pc = NULL ; - data->A_ia = NULL ; - data->A_a = NULL ; - - data->np = np ; - data->ip = my_p ; - data->recv_pc = NULL ; - data->send_pc = NULL ; - data->send_id = NULL ; - - box_partition_rcb( np, my_p, - (const int (*)[2]) gbox, ghost, - & pbox , - & map_local_ord , - & data->recv_pc , - & data->send_pc , - & data->send_id ); - - { - const int (* const my_box)[2] = (const int (*)[2]) pbox[my_p] ; - const int bx = my_box[0][0] ; - const int by = my_box[1][0] ; - const int bz = my_box[2][0] ; - const int nx = my_box[0][1] - bx ; - const int ny = my_box[1][1] - by ; - const int nz = my_box[2][1] - bz ; - const int n = nx * ny * nz ; - const int nnz = 27 * n ; /* Upper bound */ - int * const pc = (int *) malloc( sizeof(int) * ( n + 1 ) ); - int * const ia = (int *) malloc( sizeof(int) * nnz ); - MATRIX_SCALAR * const a = (MATRIX_SCALAR *) malloc( sizeof(MATRIX_SCALAR) * nnz ); - - int irow = 0 ; - int ipc = 0 ; - int ix , iy , iz ; - int sx , sy , sz ; - - for ( iz = 0 ; iz < nz ; ++iz ) { - for ( iy = 0 ; iy < ny ; ++iy ) { - for ( ix = 0 ; ix < nx ; ++ix , ++irow ) { - - if ( irow != box_map_local( my_box, ghost, map_local_ord,ix,iy,iz) ) { - fprintf(stderr,"P%d: irow[%d] != box_map_local(%d,%d,%d) = %d\n", - my_p,irow,ix,iy,iz, - box_map_local( my_box, ghost, map_local_ord, ix, iy, iz) ); - } - - pc[ irow ] = ipc ; /* Beginning of row coefficients */ - /* Diagonal term first */ - ia[ ipc ] = irow ; - a[ ipc ] = 27.0f ; - ++ipc ; - - /* Off-diagonal terms to follow */ - for ( sz = -1 ; sz <= 1 ; ++sz ) { - for ( sy = -1 ; sy <= 1 ; ++sy ) { - for ( sx = -1 ; sx <= 1 ; ++sx ) { - const int dx = ix + sx ; - const int dy = iy + sy ; - const int dz = iz + sz ; - const int global_x = dx + bx ; - const int global_y = dy + by ; - const int global_z = dz + bz ; - - if ( gbox[0][0] <= global_x && global_x < gbox[0][1] && - gbox[1][0] <= global_y && global_y < gbox[1][1] && - gbox[2][0] <= global_z && global_z < gbox[2][1] && - ! ( sz == 0 && sy == 0 && sx == 0 ) ) { - /* 'icol' is mapped for communication */ - - const int icol = - box_map_local(my_box,ghost,map_local_ord,dx,dy,dz); - - if ( icol < 0 ) { - fprintf(stderr,"P%d : bad column at local (%d,%d,%d) global(%d,%d,%d)\n", - my_p, dx,dy,dz,global_x,global_y,global_z); - fflush(stderr); - abort(); - } - - ia[ ipc ] = icol ; - a[ ipc ] = -1.0f ; - ++ipc ; - } - } - } - } - } - } - } - - pc[irow] = ipc ; - - data->nRow = irow ; - data->A_pc = pc ; - data->A_ia = ia ; - data->A_a = a ; - } - - free( map_local_ord ); - free( pbox ); -} - -/*--------------------------------------------------------------------*/ - -int main( int argc , char ** argv ) -{ - const int ghost = 1 ; - const int max_cube = 20 ; - int ncube[20] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , - 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }; - - FILE * print_file = stdout ; - int print_iter = 500 ; - int max_iter = 50 ; - - VECTOR_SCALAR tolerance = 0.0 ; /* Force max iterations */ - - int gbox[3][2] = { { 0 , 16 } , { 0 , 16 } , { 0 , 16 } }; - int nt = 0 ; - int trials = 5 ; - int ntest ; - int np = 1; - int my_p = 0 ; - -#ifdef HAVE_MPI - MPI_Init( & argc , & argv ); - MPI_Comm_size( MPI_COMM_WORLD , & np ); - MPI_Comm_rank( MPI_COMM_WORLD , & my_p ); -#endif - - if ( ! my_p ) { - const char arg_threads[] = "threads=" ; - const char arg_cube[] = "cube=" ; - const char arg_box[] = "box=" ; - const char arg_max[] = "max_iter=" ; - const char arg_trials[] = "trials=" ; - const char arg_print[] = "print_iter=" ; - const char arg_file[] = "print_file=" ; - int i ; - for ( i = 1 ; i < argc ; ++i ) { - if ( ! strncmp(argv[i],arg_threads,strlen(arg_threads)) ) { - sscanf(argv[i]+strlen(arg_threads),"%d",&nt); - } - else if ( ! strncmp(argv[i],arg_box,strlen(arg_box)) ) { - sscanf(argv[i]+strlen(arg_box),"%d%*[x]%d%*[x]%d", - & gbox[0][1] , & gbox[1][1] , & gbox[2][1] ); - } - else if ( ! strncmp(argv[i],arg_cube,strlen(arg_cube)) ) { - sscanf(argv[i]+strlen(arg_cube), - "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d", - ncube+0, ncube+1, ncube+2, ncube+3, ncube+4, - ncube+5, ncube+6, ncube+7, ncube+8, ncube+9, - ncube+10, ncube+11, ncube+12, ncube+13, ncube+14, - ncube+15, ncube+16, ncube+17, ncube+18, ncube+19); - } - else if ( ! strncmp(argv[i],arg_max,strlen(arg_max)) ) { - sscanf(argv[i]+strlen(arg_max),"%d",&max_iter); - } - else if ( ! strncmp(argv[i],arg_trials,strlen(arg_trials)) ) { - sscanf(argv[i]+strlen(arg_trials),"%d",&trials); - } - else if ( ! strncmp(argv[i],arg_print,strlen(arg_print)) ) { - sscanf(argv[i]+strlen(arg_print),"%d",&print_iter); - } - else if ( ! strncmp(argv[i],arg_file,strlen(arg_file)) ) { - char buffer[256] ; - sscanf(argv[i]+strlen(arg_file),"%s",buffer); - print_file = fopen(buffer,"a"); - } - } - } - -#ifdef HAVE_MPI - { - MPI_Bcast( & nt , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( & gbox[0][0] , 6 , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( ncube , max_cube , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( & max_iter , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( & print_iter , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); - MPI_Bcast( & trials , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); - } -#endif - - if ( nt ) { - TPI_Init( nt ); - TPI_Block(); - TPI_Unblock(); - } - - if ( ! my_p ) { - fprintf(print_file,"\"PROC\" , \"THREAD\" , \"EQUATION\" , \"NON-ZERO\" , \"MXV\" , \"AXPBY\" , \"DOT\" , \"Xerror\" , \"Iter\"\n"); - fprintf(print_file,"\"COUNT\" , \"COUNT\" , \"COUNT\" , \"COUNT\" , \"Mflops\" , \"Mflops\" , \"Mflops\" , \"L2norm\" , \"COUNT\"\n"); - } - - for ( ntest = 0 ; ! ntest || ( ntest < max_cube && ncube[ntest] ) ; ++ntest ) { - struct cgsolve_data cgdata ; - - if ( ncube[ntest] ) { - gbox[0][1] = gbox[1][1] = gbox[2][1] = ncube[ntest] ; - } - - hpccg_alloc_and_fill( np, my_p, (const int (*)[2]) gbox, ghost, &cgdata); - - cgdata.max_iter = max_iter ; - cgdata.print_iter = print_iter ; - cgdata.tolerance = tolerance ; - - { - double dt_mxv[2] = { 0 , 0 }; - double dt_axpby[2] = { 0 , 0 }; - double dt_dot[2] = { 0 , 0 }; - VECTOR_SCALAR norm_resid = 0.0 ; - int iter_count = 0 ; - int iter_total = 0 ; - int k ; - - VECTOR_SCALAR * const b = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * cgdata.nRow ); - VECTOR_SCALAR * const x = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * cgdata.nRow ); - VECTOR_SCALAR * const xexact = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * cgdata.nRow ); - - { - const VECTOR_SCALAR value = 1.0 /* 1.0 / 3.0 */ ; - int i ; - for ( i = 0 ; i < cgdata.nRow ; ++i ) xexact[i] = value ; - } - - for ( k = 0 ; k < trials ; ++k ) { - int i ; - - for ( i = 0 ; i < cgdata.nRow ; ++i ) { x[i] = 0.0 ; } - - cgsolve_set_lhs( & cgdata , xexact , b ); - - cgsolve( & cgdata, b, x, - & iter_count, & norm_resid, - dt_mxv , dt_axpby , dt_dot ); - - iter_total += iter_count ; - } - - { - int nnzGlobal = cgdata.A_pc[ cgdata.nRow ]; - double error[2] = { 0 , 0 }; - - for ( k = 0 ; k < cgdata.nRow ; ++k ) { - error[0] += ( x[k] - xexact[k] ) * ( x[k] - xexact[k] ); - error[1] += xexact[k] * xexact[k] ; - } - -#ifdef HAVE_MPI - { - double error_global[2] = { 0.0 , 0.0 }; - int nnz = nnzGlobal ; - - MPI_Allreduce( & nnz , & nnzGlobal , 1 , MPI_INT , MPI_SUM , - MPI_COMM_WORLD ); - - MPI_Allreduce( error , error_global , 2 , MPI_DOUBLE , MPI_SUM , - MPI_COMM_WORLD ); - - error[0] = error_global[0]; - error[1] = error_global[1]; - } -#endif - - error[0] = sqrt( error[0] ); - error[1] = sqrt( error[1] ); - - if ( ! my_p ) { - const int nRowGlobal = ( gbox[0][1] - gbox[0][0] ) * - ( gbox[1][1] - gbox[1][0] ) * - ( gbox[2][1] - gbox[2][0] ); - - const double mflop_mxv = - 1.0e-6 * ( iter_total ) * 2 * nnzGlobal / dt_mxv[0] ; - - const double mflop_axpby = - 1.0e-6 * ( iter_total * 3 ) * 3 * nRowGlobal / dt_axpby[0] ; - - const double mflop_dot = - 1.0e-6 * ( iter_total * 2 ) * 2 * nRowGlobal / dt_dot[0] ; - - fprintf(print_file,"%8d , %8d , %8d , %8d , %10g , %10g , %10g , %g , %d\n", - np , nt , nRowGlobal , nnzGlobal , - mflop_mxv , mflop_axpby , mflop_dot , - error[0] / error[1] , iter_total ); - fflush(print_file); - } - } - - free( xexact ); - free( x ); - free( b ); - } - free( cgdata.A_a ); - free( cgdata.A_ia ); - free( cgdata.A_pc ); - free( cgdata.recv_pc ); - free( cgdata.send_pc ); - free( cgdata.send_id ); - } - - if ( nt ) { TPI_Finalize(); } - -#ifdef HAVE_MPI - MPI_Finalize(); -#endif - - return 0 ; -} - diff --git a/kokkos/basic/optional/ThreadPool/test/hpccg/tpi_vector.c b/kokkos/basic/optional/ThreadPool/test/hpccg/tpi_vector.c deleted file mode 100644 index 1b8a26c..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hpccg/tpi_vector.c +++ /dev/null @@ -1,273 +0,0 @@ -#include - -#include - -#include -#include - -/*--------------------------------------------------------------------*/ - -struct tpi_work_vector { - VECTOR_SCALAR alpha ; - VECTOR_SCALAR beta ; - const VECTOR_SCALAR * x ; - const VECTOR_SCALAR * y ; - VECTOR_SCALAR * w ; - int n ; -}; - -static void tpi_work_span( TPI_Work * const work , const int n , - int * const iBeg , int * const iEnd ) -{ - const int chunk = ( n + work->count - 1 ) / work->count ; - const int i_end = chunk + ( *iBeg = chunk * work->rank ); - - *iEnd = n < i_end ? n : i_end ; -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_fill( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR alpha = h->alpha ; - VECTOR_SCALAR * const w = h->w ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { w[i] = alpha ; } -} - -void tpi_fill( int n , VECTOR_SCALAR alpha , VECTOR_SCALAR * x ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - tmp.alpha = alpha ; - tmp.w = x ; - tmp.n = n ; - TPI_Run_threads( tpi_work_fill , & tmp , 0 ); -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_scale( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR beta = h->beta ; - VECTOR_SCALAR * const w = h->w ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { w[i] *= beta ; } -} - -void tpi_scale( int n , const VECTOR_SCALAR alpha , VECTOR_SCALAR * x ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - tmp.alpha = alpha ; - tmp.w = x ; - tmp.n = n ; - TPI_Run_threads( tpi_work_scale , & tmp , 0 ); -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_copy( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR * const x = h->x ; - VECTOR_SCALAR * const w = h->w ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { w[i] = x[i] ; } -} - -void tpi_copy( int n , const VECTOR_SCALAR * x , VECTOR_SCALAR * y ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - tmp.x = x ; - tmp.w = y ; - tmp.n = n ; - TPI_Run_threads( tpi_work_copy , & tmp , 0 ); -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_axpby( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - const VECTOR_SCALAR alpha = h->alpha ; - const VECTOR_SCALAR beta = h->beta ; - const VECTOR_SCALAR * const x = h->x ; - VECTOR_SCALAR * const w = h->w ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { w[i] = alpha * x[i] + beta * w[i] ; } -} - -void tpi_axpby( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , - VECTOR_SCALAR beta , VECTOR_SCALAR * y ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - tmp.alpha = alpha ; - tmp.beta = beta ; - tmp.x = x ; - tmp.w = y ; - tmp.n = n ; - - TPI_Run_threads( tpi_work_axpby , & tmp , 0 ); -} - -/*--------------------------------------------------------------------*/ - -static void tpi_work_dot_partial( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - VECTOR_SCALAR * const s = (VECTOR_SCALAR *) work->reduce ; - const VECTOR_SCALAR * const x = h->x ; - const VECTOR_SCALAR * const y = h->y ; - VECTOR_SCALAR tmp = *s ; - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { tmp += x[i] * y[i] ; } - - *s = tmp ; -} - -static void tpi_work_dot_partial_self( TPI_Work * work ) -{ - const struct tpi_work_vector * const h = - (struct tpi_work_vector *) work->info ; - - VECTOR_SCALAR * const s = (VECTOR_SCALAR *) work->reduce ; - const VECTOR_SCALAR * const x = h->x ; - VECTOR_SCALAR tmp = *s ; - - int i , iEnd ; - - tpi_work_span( work , h->n , & i , & iEnd ); - - for ( ; i < iEnd ; ++i ) { const VECTOR_SCALAR d = x[i] ; tmp += d * d ; } - - *s = tmp ; -} - -static void tpi_work_dot_join( TPI_Work * work , const void * src ) -{ - *((VECTOR_SCALAR *) ( work->reduce) ) += *((const VECTOR_SCALAR *) src); -} - -static void tpi_work_dot_init( TPI_Work * work ) -{ - *((VECTOR_SCALAR *) ( work->reduce) ) = 0 ; -} - -VECTOR_SCALAR tpi_dot( int n , const VECTOR_SCALAR * x , const VECTOR_SCALAR * y ) -{ - struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; - VECTOR_SCALAR result = 0.0 ; - tmp.x = x ; - tmp.y = y ; - tmp.n = n ; - if ( x != y ) { - TPI_Run_threads_reduce( tpi_work_dot_partial , & tmp , - tpi_work_dot_join , tpi_work_dot_init , - sizeof(result) , & result ); - } - else { - TPI_Run_threads_reduce( tpi_work_dot_partial_self , & tmp , - tpi_work_dot_join , tpi_work_dot_init , - sizeof(result) , & result ); - } - return result ; -} - -/*--------------------------------------------------------------------*/ - -struct tpi_crs_matrix { - int nRow ; - const int * A_pc ; - const int * A_ia ; - const MATRIX_SCALAR * A_a ; - const VECTOR_SCALAR * x ; - VECTOR_SCALAR * y ; -}; - -static void tpi_work_crs_matrix_apply( TPI_Work * work ) -{ - const struct tpi_crs_matrix * const h = - (struct tpi_crs_matrix *) work->info ; - - const int * const A_pc = h->A_pc ; - const int * const A_ia = h->A_ia ; - const MATRIX_SCALAR * const A_a = h->A_a ; - const VECTOR_SCALAR * const x = h->x ; - - const int nRow = h->nRow ; - const int chunk = ( nRow + work->count - 1 ) / work->count ; - - int row = chunk * work->rank ; - int rowEnd = chunk + row ; - - if ( nRow < rowEnd ) { rowEnd = nRow ; } - - { - const int * const pc_end = A_pc + rowEnd ; - const int * pc = A_pc + row ; - VECTOR_SCALAR * y = h->y + row ; - - for ( ; pc != pc_end ; ++pc , ++y ) { - const int * ia = A_ia + *pc ; - const MATRIX_SCALAR * a = A_a + *pc ; - const MATRIX_SCALAR * const a_end = A_a + pc[1] ; - VECTOR_SCALAR tmp = 0 ; - for ( ; a != a_end ; ++a , ++ia ) { - tmp += *a * x[ *ia ]; - } - *y = tmp ; - } - } -} - -/*--------------------------------------------------------------------*/ - -void tpi_crs_matrix_apply( - const int nRow , - const int * A_pc , - const int * A_ia , - const MATRIX_SCALAR * A_a , - const VECTOR_SCALAR * x , - VECTOR_SCALAR * y ) -{ - struct tpi_crs_matrix h = { 0 , NULL , NULL , NULL , NULL , NULL }; - h.nRow = nRow ; - h.A_pc = A_pc ; - h.A_ia = A_ia ; - h.A_a = A_a ; - h.x = x ; - h.y = y ; - TPI_Run_threads( tpi_work_crs_matrix_apply , & h , 0 ); -} - - diff --git a/kokkos/basic/optional/ThreadPool/test/hpccg/tpi_vector.h b/kokkos/basic/optional/ThreadPool/test/hpccg/tpi_vector.h deleted file mode 100644 index bcd514e..0000000 --- a/kokkos/basic/optional/ThreadPool/test/hpccg/tpi_vector.h +++ /dev/null @@ -1,31 +0,0 @@ - -#include - -#ifndef tpi_vector_h -#define tpi_vector_h - -#define VECTOR_SCALAR float -#define MATRIX_SCALAR float - -void tpi_fill( int n , VECTOR_SCALAR alpha , VECTOR_SCALAR * x ); - -void tpi_scale( int n , const VECTOR_SCALAR alpha , VECTOR_SCALAR * x ); - -void tpi_copy( int n , const VECTOR_SCALAR * x , VECTOR_SCALAR * y ); - -void tpi_axpby( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , - VECTOR_SCALAR beta , VECTOR_SCALAR * y ); - -VECTOR_SCALAR tpi_dot( int n , const VECTOR_SCALAR * x , - const VECTOR_SCALAR * y ); - -void tpi_crs_matrix_apply( - const int nRow , - const int * A_pc , - const int * A_ia , - const MATRIX_SCALAR * A_a , - const VECTOR_SCALAR * x , - VECTOR_SCALAR * y ); - -#endif - diff --git a/kokkos/basic/optional/ThreadPool/test/test_c_dnax.c b/kokkos/basic/optional/ThreadPool/test/test_c_dnax.c deleted file mode 100644 index 4f6ab9b..0000000 --- a/kokkos/basic/optional/ThreadPool/test/test_c_dnax.c +++ /dev/null @@ -1,414 +0,0 @@ -/*------------------------------------------------------------------------*/ -/* TPI: Thread Pool Interface */ -/* Copyright (2008) Sandia Corporation */ -/* */ -/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ -/* license for use of this work by or on behalf of the U.S. Government. */ -/* */ -/* This library is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU Lesser General Public License as */ -/* published by the Free Software Foundation; either version 2.1 of the */ -/* License, or (at your option) any later version. */ -/* */ -/* This library is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* Lesser General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU Lesser General Public */ -/* License along with this library; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ -/* USA */ -/*------------------------------------------------------------------------*/ -/** - * @author H. Carter Edwards - * - * Multi-array 'axpby' - */ - -#include -#include -#include -#include -#include - -#if defined( HAVE_MPI ) -#include -#endif - -int test_c_tpi_dnax( int , int ); - -int main( int argc , char ** argv ) -{ - int num_thread[] = { 1 , 2 , 4 , 6 , 8 , 12 , 16 }; - int num_test = sizeof(num_thread) / sizeof(int); - - const int ntrial = 1 < argc ? atoi( argv[1] ) : 2 ; - int i ; - -#if defined( HAVE_MPI ) - int rank ; - - MPI_Init( & argc , & argv ); - MPI_Comm_rank( MPI_COMM_WORLD , & rank ); - if ( 0 == rank ) { -#endif - - - fprintf( stdout , "\"TESTING Multiarray 'axpby' with: %s\"\n" , - TPI_Version() ); - - for ( i = 0 ; i < num_test ; ++i ) { - test_c_tpi_dnax( num_thread[i] , ntrial ); - } - -#if defined( HAVE_MPI ) - } - MPI_Finalize(); -#endif - - return 0 ; -} - -/*------------------------------------------------------------------------*/ - -typedef double SCALAR ; - -/*------------------------------------------------------------------------*/ - -struct TestTPI_DNAX { - SCALAR * coef ; - SCALAR * array ; - unsigned number ; - unsigned length ; - unsigned stride ; - unsigned chunk_length ; -}; - -/*------------------------------------------------------------------------*/ - -static -void test_dnax_column( const unsigned num_array , - const unsigned stride , - const unsigned length , - const SCALAR * const coef , - SCALAR * const array ) -{ - unsigned i = 0 ; - for ( ; i < length ; ++i ) { - SCALAR * const a = array + i ; - SCALAR tmp = 0 ; - unsigned j = 0 ; - for ( ; j < num_array ; ++j ) { tmp += coef[j] * a[ j * stride ] ; } - a[0] = tmp ; - } -} - -static -void test_dnax_row( const unsigned num_array , - const unsigned stride , - const unsigned length , - const SCALAR * const coef , - SCALAR * const array ) -{ - unsigned i = 0 ; - for ( ; i < length ; ++i ) { - SCALAR * const a = array + i * stride ; - SCALAR tmp = 0 ; - unsigned j = 0 ; - for ( ; j < num_array ; ++j ) { tmp += coef[j] * a[j] ; } - a[0] = tmp ; - } -} - -/*------------------------------------------------------------------------*/ -/* The multi-array storage is flat: every array is fully contiguous. - * Work corresponds to a span of the array. - */ -static -void test_dnax_flat_work( TPI_Work * work ) -{ - const struct TestTPI_DNAX * const info = - (struct TestTPI_DNAX *) work->info ; - - const unsigned which_chunk = work->rank ; - const unsigned beg_local = info->chunk_length * which_chunk ; - const unsigned max_local = info->length - beg_local ; - const unsigned len_local = info->chunk_length < max_local ? - info->chunk_length : max_local ; - - test_dnax_column( info->number , - info->stride , - len_local , - info->coef , - info->array + beg_local ); - - return ; -} - -/* The multi-array storage is chunked: each array has a contiguous chunk; - * but chunk-subarrays are contiguously grouped. - */ -static -void test_dnax_column_work( TPI_Work * work ) -{ - const struct TestTPI_DNAX * const info = - (struct TestTPI_DNAX *) work->info ; - - const unsigned which_chunk = work->rank ; - const unsigned beg_local = info->chunk_length * which_chunk ; - const unsigned max_local = info->length - beg_local ; - const unsigned len_local = info->chunk_length < max_local ? - info->chunk_length : max_local ; - - const unsigned chunk_size = info->chunk_length * info->number ; - - test_dnax_column( info->number , - info->chunk_length , - len_local , - info->coef , - info->array + which_chunk * chunk_size ); - - return ; -} - -static -void test_dnax_row_work( TPI_Work * work ) -{ - const struct TestTPI_DNAX * const info = - (struct TestTPI_DNAX *) work->info ; - - const unsigned which_chunk = work->rank ; - const unsigned beg_local = info->chunk_length * which_chunk ; - const unsigned max_local = info->length - beg_local ; - const unsigned len_local = info->chunk_length < max_local ? - info->chunk_length : max_local ; - - const unsigned chunk_size = info->chunk_length * info->number ; - - test_dnax_row( info->number , - info->number , - len_local , - info->coef , - info->array + which_chunk * chunk_size ); - - return ; -} - -/*------------------------------------------------------------------------*/ -/* Process identical block of allocated memory as a - * as a flat array, chunked-column, and chunked-row. - */ - -static -void test_tpi_dnax_driver( const int nthread , - const unsigned Mflop_target , - const unsigned num_trials , - const unsigned num_test , - const unsigned num_test_array[] , - const unsigned length_array , - const unsigned length_chunk ) -{ - const unsigned max_array = num_test_array[ num_test - 1 ]; - - const unsigned num_chunk = - ( length_array + length_chunk - 1 ) / length_chunk ; - - const unsigned stride_array = num_chunk * length_chunk ; - const unsigned size_alloc = max_array * stride_array ; - - SCALAR * const coef = (SCALAR *) malloc( max_array * sizeof(SCALAR) ); - SCALAR * const array = (SCALAR *) malloc( size_alloc * sizeof(SCALAR) ); - - struct TestTPI_DNAX data = { NULL , NULL , 0 , 0 , 0 , 0 }; - - unsigned i_test , i , j ; - - data.coef = coef ; - - if ( NULL == array ) { - fprintf(stderr,"allocation failure for %u\n",size_alloc); - abort(); - } - - for ( i = 0 ; i < max_array ; ++i ) { coef[i] = 0 ; } - - printf("\n\"test_tpi_dnax[%d]( length_array = %u , stride_array = %u )\"\n", - nthread , length_array , stride_array ); - printf("\"NUMBER OF THREADS\" , %d\n" , nthread ); - printf("\"NUMBER OF CHUNKS\" , %u\n" , num_chunk ); - printf("\"NUMBER OF TRIALS\" , %u \n", num_trials ); - - printf("\"TEST\" , \"#ARRAY\" \"DT-MEAN\" , \"DT-STDDEV\" , \"MFLOP-MEAN\" , \"MFLOP-STDDEV\"\n"); - - /*----------------------------------------------------------------------*/ - - for ( i_test = 0 ; i_test < num_test ; ++i_test ) { - const unsigned num_array = num_test_array[ i_test ]; - const unsigned num_sets = max_array / num_array ; - - const double mflop_cycle = - ((double)( 2 * num_array * length_array )) / 1.0e6 ; - - const unsigned ncycle = 1 + (unsigned)( Mflop_target / mflop_cycle ); - - double dt_sum = 0 ; - double dt_sum_2 = 0 ; - - data.length = length_array ; - data.number = num_array ; - data.stride = stride_array ; - data.chunk_length = length_chunk ; - - for ( i = 0 ; i < size_alloc ; ++i ) { array[i] = 0 ; } - - for ( j = 0 ; j < num_trials ; ++j ) { - - double dt_tmp = TPI_Walltime(); - for ( i = 0 ; i < ncycle ; ++i ) { - data.array = array + stride_array * num_array * ( i % num_sets ); - TPI_Run( & test_dnax_flat_work , & data , num_chunk , 0 ); - } - dt_tmp = TPI_Walltime() - dt_tmp ; - - dt_sum += dt_tmp ; - dt_sum_2 += dt_tmp * dt_tmp ; - } - - { - const double dt_mean = dt_sum / num_trials ; - const double dt_sdev = sqrt( ( num_trials * dt_sum_2 - dt_sum * dt_sum ) / ( num_trials * ( num_trials - 1 ) ) ); - const double mflop_mean = mflop_cycle * ncycle / dt_mean ; - const double mflop_sdev = mflop_mean * dt_sdev / ( dt_mean + dt_sdev ); - - printf("\"FLAT ARRAY\" , %6u , %9.5g , %9.3g , %9.5g , %9.3g\n", - num_array, dt_mean, dt_sdev, mflop_mean, mflop_sdev ); - } - } - - /*----------------------------------------------------------------------*/ - - for ( i_test = 0 ; i_test < num_test ; ++i_test ) { - - const unsigned num_array = num_test_array[ i_test ]; - const unsigned num_sets = max_array / num_array ; - - const double mflop_cycle = - ((double)( 2 * num_array * length_array )) / 1.0e6 ; - - const unsigned ncycle = 1 + (unsigned)( Mflop_target / mflop_cycle ); - - double dt_sum = 0 ; - double dt_sum_2 = 0 ; - - data.length = length_array ; - data.number = num_array ; - data.stride = stride_array ; - data.chunk_length = length_chunk ; - - for ( i = 0 ; i < size_alloc ; ++i ) { array[i] = 0 ; } - - for ( j = 0 ; j < num_trials ; ++j ) { - - double dt_tmp = TPI_Walltime(); - for ( i = 0 ; i < ncycle ; ++i ) { - data.array = array + stride_array * num_array * ( i % num_sets ); - TPI_Run( & test_dnax_column_work , & data , num_chunk , 0 ); - } - dt_tmp = TPI_Walltime() - dt_tmp ; - - dt_sum += dt_tmp ; - dt_sum_2 += dt_tmp * dt_tmp ; - } - - { - const double dt_mean = dt_sum / num_trials ; - const double dt_sdev = sqrt( ( num_trials * dt_sum_2 - dt_sum * dt_sum ) / ( num_trials * ( num_trials - 1 ) ) ); - const double mflop_mean = mflop_cycle * ncycle / dt_mean ; - const double mflop_sdev = mflop_mean * dt_sdev / ( dt_mean + dt_sdev ); - - printf("\"CHUNK COLUMN\" , %6u , %9.5g , %9.3g , %9.5g , %9.3g\n", - num_array, dt_mean, dt_sdev, mflop_mean, mflop_sdev ); - } - } - - /*----------------------------------------------------------------------*/ - - for ( i_test = 0 ; i_test < num_test ; ++i_test ) { - - const unsigned num_array = num_test_array[ i_test ]; - const unsigned num_sets = max_array / num_array ; - - const double mflop_cycle = - ((double)( 2 * num_array * length_array )) / 1.0e6 ; - - const unsigned ncycle = 1 + (unsigned)( Mflop_target / mflop_cycle ); - - double dt_sum = 0 ; - double dt_sum_2 = 0 ; - - data.length = length_array ; - data.number = num_array ; - data.stride = stride_array ; - data.chunk_length = length_chunk ; - - for ( i = 0 ; i < size_alloc ; ++i ) { array[i] = 0 ; } - - for ( j = 0 ; j < num_trials ; ++j ) { - - double dt_tmp = TPI_Walltime(); - - for ( i = 0 ; i < ncycle ; ++i ) { - data.array = array + stride_array * num_array * ( i % num_sets ); - TPI_Run( & test_dnax_row_work , & data , num_chunk , 0 ); - } - dt_tmp = TPI_Walltime() - dt_tmp ; - - dt_sum += dt_tmp ; - dt_sum_2 += dt_tmp * dt_tmp ; - } - - { - const double dt_mean = dt_sum / num_trials ; - const double dt_sdev = sqrt( ( num_trials * dt_sum_2 - dt_sum * dt_sum ) / ( num_trials * ( num_trials - 1 ) ) ); - const double mflop_mean = mflop_cycle * ncycle / dt_mean ; - const double mflop_sdev = mflop_mean * dt_sdev / ( dt_mean + dt_sdev ); - - printf("\"CHUNK ROW\" , %6u , %9.5g , %9.3g , %9.5g , %9.3g\n", - num_array, dt_mean, dt_sdev, mflop_mean, mflop_sdev ); - } - } - - /*----------------------------------------------------------------------*/ - - free( array ); - free( coef ); -} - -/*------------------------------------------------------------------------*/ - -int test_c_tpi_dnax( int nthread , int ntrial ) -{ - const unsigned Mflop_target = 10 ; - const unsigned num_array[6] = { 2 , 5 , 10 , 20 , 50 , 100 }; - const unsigned ntest = sizeof(num_array) / sizeof(unsigned); - - if ( ntrial <= 0 ) { ntrial = 7 ; } - - TPI_Init( nthread ); - - test_tpi_dnax_driver( nthread , - Mflop_target * nthread , - ntrial /* number trials */ , - ntest /* number of tests */ , - num_array /* number of arrays for each test */ , - 1e6 /* array computation length */ , - 1000 /* chunk length */ ); - - TPI_Finalize(); - - return 0 ; -} - - - diff --git a/kokkos/basic/optional/ThreadPool/test/test_mpi_sum.c b/kokkos/basic/optional/ThreadPool/test/test_mpi_sum.c deleted file mode 100644 index 51d6b9e..0000000 --- a/kokkos/basic/optional/ThreadPool/test/test_mpi_sum.c +++ /dev/null @@ -1,764 +0,0 @@ -/*------------------------------------------------------------------------*/ -/* TPI: Thread Pool Interface */ -/* Copyright (2008) Sandia Corporation */ -/* */ -/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ -/* license for use of this work by or on behalf of the U.S. Government. */ -/* */ -/* This library is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU Lesser General Public License as */ -/* published by the Free Software Foundation; either version 2.1 of the */ -/* License, or (at your option) any later version. */ -/* */ -/* This library is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* Lesser General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU Lesser General Public */ -/* License along with this library; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ -/* USA */ -/*------------------------------------------------------------------------*/ -/** - * @author H. Carter Edwards - */ - -#include -#include -#include -#include -#include - -int rand_r( unsigned int * ); - -/*--------------------------------------------------------------------*/ - -#if defined(HAVE_MPI) - -#include - -typedef MPI_Comm COMM ; - -#else - -typedef int COMM ; - -#endif - -static int comm_size( COMM ); -static int comm_rank( COMM ); -static void comm_reduce_dmax( COMM , double * ); -static void comm_reduce_dsum( COMM , double * ); -static void comm_reduce_d4_sum( COMM , double * ); - -/*--------------------------------------------------------------------*/ - -static void my_span( const unsigned count , const unsigned rank , - const unsigned size , - unsigned * begin , unsigned * length ) -{ - const unsigned int max = ( size + count - 1 ) / count ; - const unsigned int end = size - max * ( count - ( rank + 1 ) ); - if ( rank ) { - *begin = end - max ; - *length = max ; - } - else { - *begin = 0 ; - *length = end ; - } -} - -/*--------------------------------------------------------------------*/ - -#define LESS_ABS( X , Y ) ( ( X < 0 ? -X : X ) < ( Y < 0 ? -Y : Y ) ) - -static void d2_add_d( double v[] , const double a ) -{ - const int AltV = a < 0 ? ( - a < ( v[0] < 0 ? - v[0] : v[0] ) ) - : ( a < ( v[0] < 0 ? - v[0] : v[0] ) ); - - const double VpA = v[0] + a ; - - v[1] += AltV ? ( a - ( VpA - v[0] ) ) : ( v[0] - ( VpA - a ) ); - v[0] = VpA + v[1] ; - v[1] += VpA - v[0] ; -} - -void d4_dot( double v[] , unsigned n , const double * x , const double * y ) -{ - double * pos = v ; - double * neg = v + 2 ; - const double * const x_end = x + n ; - for ( ; x < x_end ; ++x , ++y ) { - const double a = *x * *y ; - if ( a < 0 ) { d2_add_d( neg , a ); } - else { d2_add_d( pos , a ); } - } -} - -double ddot( unsigned n , const double * x , const double * y ) -{ - double val = 0 ; - const double * const x_end = x + n ; - for ( ; x < x_end ; ++x , ++y ) { val += *x * *y ; } - return val ; -} - -/*--------------------------------------------------------------------*/ - -struct TaskXY { - unsigned int nreduce ; - unsigned int n ; - const double * x ; - const double * y ; -}; - -static -void reduce_init( TPI_Work * work ) -{ - struct TaskXY * const info = (struct TaskXY *) work->info ; - double * const dst = (double *) work->reduce ; - - if ( info->nreduce == 4 ) { - dst[0] = 0 ; - dst[1] = 0 ; - dst[2] = 0 ; - dst[3] = 0 ; - } - else if ( info->nreduce == 1 ) { - dst[0] = 0 ; - } -} - -static -void reduce_join( TPI_Work * work , const void * arg_src ) -{ - struct TaskXY * const info = (struct TaskXY *) work->info ; - double * const dst = (double *) work->reduce ; - const double * const src = (const double *) arg_src ; - - if ( info->nreduce == 4 ) { - d2_add_d( dst , src[0] ); - d2_add_d( dst , src[1] ); - d2_add_d( dst + 2 , src[2] ); - d2_add_d( dst + 2 , src[3] ); - } - else if ( info->nreduce == 1 ) { - dst[0] += src[0] ; - } -} - -/*--------------------------------------------------------------------*/ - -static -void work_d4_dot_tp( TPI_Work * work ) -{ - struct TaskXY * const info = (struct TaskXY *) work->info ; - double * const dst = (double *) work->reduce ; - - unsigned int begin , length ; - - my_span( work->count , work->rank , info->n , & begin , & length ); - - d4_dot( dst , length , info->x + begin , info->y + begin ); -} - -double d4_dot_tp( COMM comm, unsigned nwork, unsigned n, - const double * x, const double * y ) -{ - struct TaskXY info = { 4 , 0 , NULL , NULL }; - double result[4] = { 0 , 0 , 0 , 0 }; - info.n = n ; - info.x = x ; - info.y = y ; - - if ( nwork ) { - TPI_Run_reduce( work_d4_dot_tp , & info , nwork , - reduce_join, reduce_init, sizeof(result) , result ); - } - else { - TPI_Run_threads_reduce( work_d4_dot_tp , & info , - reduce_join, reduce_init, sizeof(result), result); - } - - comm_reduce_d4_sum( comm , result ); - - d2_add_d( result , result[2] ); - d2_add_d( result , result[3] ); - - return result[0] ; -} - -static -void task_ddot_tp( TPI_Work * work ) -{ - struct TaskXY * const info = (struct TaskXY *) work->info ; - double * const dst = (double *) work->reduce ; - unsigned int begin , length ; - - my_span( work->count , work->rank , info->n , & begin , & length ); - - *dst += ddot( length , info->x + begin , info->y + begin ); - - return ; -} - -double ddot_tp( COMM comm, unsigned nwork, unsigned n, - const double * x, const double * y ) -{ - struct TaskXY info = { 1 , 0 , NULL , NULL }; - double result = 0 ; - info.n = n ; - info.x = x ; - info.y = y ; - - if ( nwork ) { - TPI_Run_reduce( task_ddot_tp , & info , nwork , - reduce_join, reduce_init, sizeof(result), & result); - } - else { - TPI_Run_threads_reduce( task_ddot_tp , & info , - reduce_join, reduce_init, sizeof(result), & result); - } - - comm_reduce_dsum( comm , & result ); - - return result ; -} - -/*--------------------------------------------------------------------*/ - -void dfill_rand( unsigned seed , unsigned n , double * x , double mag ) -{ - const double scale = 2.0 * mag / (double) RAND_MAX ; - double * const xe = x + n ; - for ( ; xe != x ; ++x , ++seed ) { - unsigned s = seed ; - *x = scale * ((double) rand_r( & s )) - mag ; - } -} - -struct FillWork { - double mag ; - double * beg ; - unsigned length ; - unsigned seed ; -}; - -static void task_dfill_rand( TPI_Work * work ) -{ - struct FillWork * const w = (struct FillWork *) work->info ; - - unsigned int begin , length ; - - my_span( work->count, work->rank, w->length, & begin , & length ); - - dfill_rand( w->seed + begin , length , w->beg + begin , w->mag ); -} - -void dfill_rand_tp( unsigned nblock , unsigned seed , - unsigned n , double * x , double mag ) -{ - struct FillWork data ; - data.mag = mag ; - data.beg = x ; - data.length = n ; - data.seed = seed ; - if ( nblock ) { - const int nwork = ( n + nblock - 1 ) / nblock ; - TPI_Run( & task_dfill_rand , & data , nwork , 0 ); - } - else { - TPI_Run_threads( & task_dfill_rand , & data , 0 ); - } -} - -/*--------------------------------------------------------------------*/ - -static -void test_ddot_performance( - COMM comm , - const int nthreads , - const int nblock , - const unsigned int num_trials , - const unsigned int num_tests , - const unsigned int length_array[] /* Global array length for each test */ , - const double mag ) -{ - const unsigned int ddot_flop = 2 ; /* 1 mult, 1 sum */ - const unsigned int d4_dot_flop = 12 ; /* 1 mult, 7 sum, 4 compare */ - - const unsigned int p_rank = comm_rank( comm ); - const unsigned int p_size = comm_size( comm ); - - const unsigned int max_array = length_array[ num_tests - 1 ]; - - unsigned int local_max_size = 0 ; - unsigned int i_test ; - - TPI_Init( nthreads ); - - if ( 0 == p_rank ) { - fprintf(stdout,"\n\"DDOT and D4DOT Performance testing\"\n"); - fprintf(stdout,"\"MPI size = %u , TPI size = %d , BlockSize = %d , #Trials = %u\"\n",p_size,nthreads,nblock,num_trials); - fprintf(stdout,"\"TEST\" , \"LENGTH\" , \"#CYCLE\" , \"DT-MEAN\" , \"DT-STDDEV\" , \"MFLOP-MEAN\" , \"MFLOP-STDDEV\"\n"); - } - - for ( i_test = 0 ; i_test < num_tests ; ++i_test ) { - const unsigned length = length_array[ i_test ]; /* Global */ - const unsigned ncycle = 2 * max_array / length ; - const unsigned local_max = ncycle * ( ( length + p_size - 1 ) / p_size ); - if ( local_max_size < local_max ) { local_max_size = local_max ; } - } - - { - double * const x = (double*) malloc(local_max_size * 2 * sizeof(double)); - double * const y = x + local_max_size ; - - unsigned int i , j ; - - dfill_rand_tp( nblock, 0, local_max_size, x, mag ); - dfill_rand_tp( nblock, local_max_size, local_max_size, y, mag ); - - for ( i_test = 0 ; i_test < num_tests ; ++i_test ) { - const unsigned length = length_array[ i_test ]; /* Global */ - const unsigned ncycle = 2 * max_array / length ; - - unsigned int local_begin , local_length , local_nwork ; - - double dt_sum = 0.0 ; - double dt_sum_2 = 0.0 ; - - my_span( p_size, p_rank, length, & local_begin , & local_length ); - - local_nwork = nblock ? ( local_length + nblock - 1 ) / nblock : 0 ; - - /*--------------------------------------------------------------*/ - - for ( i = 0 ; i < num_trials ; ++i ) { - double dt = TPI_Walltime(); - for ( j = 0 ; j < ncycle ; ++j ) { - ddot_tp( comm, local_nwork, local_length, - x + j * local_length , - y + j * local_length ); - } - dt = TPI_Walltime() - dt ; - comm_reduce_dmax( comm , & dt ); - dt_sum += dt ; - dt_sum_2 += dt * dt ; - } - - if ( 0 == p_rank ) { - const double mflop = ((double)( ddot_flop * length * ncycle ) ) / ((double) 1e6 ); - - const double dt_mean = dt_sum / num_trials ; - const double dt_sdev = sqrt( ( num_trials * dt_sum_2 - dt_sum * dt_sum ) / - ( num_trials * ( num_trials - 1 ) ) ); - const double mflop_mean = mflop / dt_mean ; - const double mflop_sdev = mflop_mean * dt_sdev / ( dt_mean + dt_sdev ); - - fprintf(stdout,"\"DDOT\" , %8u , %8u , %9.5g , %9.5g , %9.5g , %9.5g\n", - length, ncycle, dt_mean, dt_sdev, mflop_mean, mflop_sdev ); - fflush(stdout); - } - } - - for ( i_test = 0 ; i_test < num_tests ; ++i_test ) { - const unsigned length = length_array[ i_test ]; /* Global */ - const unsigned ncycle = 2 * max_array / length ; - - unsigned int local_begin , local_length , local_nwork ; - - double dt_sum = 0 ; - double dt_sum_2 = 0 ; - - my_span( p_size, p_rank, length, & local_begin , & local_length ); - - local_nwork = nblock ? ( local_length + nblock - 1 ) / nblock : 0 ; - - /*--------------------------------------------------------------*/ - - for ( i = 0 ; i < num_trials ; ++i ) { - double dt = TPI_Walltime(); - for ( j = 0 ; j < ncycle ; ++j ) { - d4_dot_tp( comm, local_nwork, local_length, - x + j * local_length , - y + j * local_length ); - } - dt = TPI_Walltime() - dt ; - comm_reduce_dmax( comm , & dt ); - dt_sum += dt ; - dt_sum_2 += dt * dt ; - } - - if ( 0 == p_rank ) { - const double mflop = ((double)( d4_dot_flop * length * ncycle ) ) / ((double) 1e6 ); - - const double dt_mean = dt_sum / num_trials ; - const double dt_sdev = sqrt( ( num_trials * dt_sum_2 - dt_sum * dt_sum ) / - ( num_trials * ( num_trials - 1 ) ) ); - const double mflop_mean = mflop / dt_mean ; - const double mflop_sdev = mflop_mean * dt_sdev / ( dt_mean + dt_sdev ); - - fprintf(stdout,"\"D4DOT\" , %8u , %8u , %9.5g , %9.5g , %9.5g , %9.5g\n", - length, ncycle, dt_mean, dt_sdev, mflop_mean, mflop_sdev ); - fflush(stdout); - } - } - - /*--------------------------------------------------------------*/ - - free( x ); - } - - TPI_Finalize(); - - return ; -} - -/*--------------------------------------------------------------------*/ - -static -void test_ddot_accuracy( - COMM comm , - const int nthreads , - const int nblock , - const unsigned int num_tests , - const unsigned int length_array[] /* Global array length for each test */ , - const double mag ) -{ - const unsigned int p_rank = comm_rank( comm ); - const unsigned int p_size = comm_size( comm ); - - const unsigned int max_array = length_array[ num_tests - 1 ]; - const unsigned int local_max_size = ( max_array + p_size - 1 ) / p_size ; - - unsigned int i_test ; - - TPI_Init( nthreads ); - - if ( 0 == p_rank ) { - fprintf(stdout,"\n\"DDOT and D4DOT Accuracy testing\"\n"); - fprintf(stdout,"\"MPI size = %u , TPI size = %d , BlockSize = %d\"\n",p_size,nthreads,nblock); - fprintf(stdout,"\"TEST\" , \"LENGTH\" , \"VALUE\"\n"); - } - - { - double * const x = (double*) malloc(local_max_size * 2 * sizeof(double)); - double * const y = x + local_max_size ; - - for ( i_test = 0 ; i_test < num_tests ; ++i_test ) { - const unsigned length = length_array[ i_test ]; /* Global */ - const unsigned length_half = length / 2 ; - - unsigned local_begin , local_length , local_nwork ; - - double val_ddot ; - - my_span( p_size, p_rank, length, & local_begin , & local_length ); - - local_nwork = nblock ? ( local_length + nblock - 1 ) / nblock : 0 ; - - /*--------------------------------------------------------------*/ - - if ( local_begin < length_half ) { - const unsigned len = local_length < length_half - local_begin - ? local_length : length_half - local_begin ; - - dfill_rand_tp( nblock, local_begin, len, x, mag ); - dfill_rand_tp( nblock, length + local_begin, len, y, mag ); - } - - if ( length_half < local_begin + local_length ) { - const unsigned beg = length_half > local_begin - ? length_half : local_begin ; - const unsigned off = beg - local_begin ; - const unsigned len = local_length - off ; - - dfill_rand_tp( nblock, beg - length_half, len, x + off, mag ); - dfill_rand_tp( nblock, length + beg - length_half, len, y + off, - mag ); - } - - /*--------------------------------------------------------------*/ - - val_ddot = ddot_tp( comm, local_nwork, local_length, x, y ); - - if ( 0 == p_rank ) { - fprintf(stdout,"\"DDOT\" , %8u , %9.3g\n", length , val_ddot ); - fflush(stdout); - } - } - - for ( i_test = 0 ; i_test < num_tests ; ++i_test ) { - const unsigned length = length_array[ i_test ]; /* Global */ - const unsigned length_half = length / 2 ; - - unsigned local_begin , local_length , local_nwork ; - - double val_d4_dot ; - - my_span( p_size, p_rank, length, & local_begin , & local_length ); - - local_nwork = nblock ? ( local_length + nblock - 1 ) / nblock : 0 ; - - /*--------------------------------------------------------------*/ - - if ( local_begin < length_half ) { - const unsigned len = local_length < length_half - local_begin - ? local_length : length_half - local_begin ; - - dfill_rand_tp( nblock, local_begin, len, x, mag ); - dfill_rand_tp( nblock, length + local_begin, len, y, mag ); - } - - if ( length_half < local_begin + local_length ) { - const unsigned beg = length_half > local_begin - ? length_half : local_begin ; - const unsigned off = beg - local_begin ; - const unsigned len = local_length - off ; - - dfill_rand_tp( nblock, beg - length_half, len, x + off, mag ); - dfill_rand_tp( nblock, length + beg - length_half, len, y + off, - mag ); - } - - /*--------------------------------------------------------------*/ - - val_d4_dot = d4_dot_tp( comm, local_nwork, local_length, x , y ); - - if ( 0 == p_rank ) { - fprintf(stdout,"\"DDOT\" , %8u , %9.3g\n", length , val_d4_dot ); - fflush(stdout); - } - } - - /*--------------------------------------------------------------*/ - - free( x ); - } - - TPI_Finalize(); - - return ; -} - -/*--------------------------------------------------------------------*/ - -const unsigned test_lengths[] = - { 1e4 , 2e4 , 5e4 , - 1e5 , 2e5 , 5e5 , - 1e6 , 2e6 , 5e6 , 1e7 }; - -const unsigned test_count = sizeof(test_lengths) / sizeof(unsigned); -const unsigned nblock = 2500 ; - -const double test_mag = 1e4 ; - -static void test_performance( - COMM comm , const int test_thread_count , const int test_thread[] ) -{ - const unsigned num_trials = 11 ; - - int i ; - - for ( i = 0 ; i < test_thread_count ; ++i ) { - - test_ddot_performance( comm , test_thread[i] , nblock, - num_trials , test_count , test_lengths , test_mag ); - - test_ddot_performance( comm , test_thread[i] , 0, - num_trials , test_count , test_lengths , test_mag ); - } -} - -static void test_accuracy( - COMM comm , const int test_thread_count , const int test_thread[] , - unsigned test_do ) -{ - int i ; - - if ( test_count < test_do ) { test_do = test_count ; } - - for ( i = 0 ; i < test_thread_count ; ++i ) { - - test_ddot_accuracy( comm, test_thread[i], nblock, - test_do, test_lengths, test_mag ); - - test_ddot_accuracy( comm, test_thread[i], 0, - test_do, test_lengths, test_mag ); - } -} - -/*--------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -#define TEST_THREAD_MAX 128 - -#if defined(HAVE_MPI) - -int main( int argc , char **argv ) -{ - int nthread[ TEST_THREAD_MAX ]; - int i ; - - MPI_Init( & argc , & argv ); - - for ( i = 0 ; i < TEST_THREAD_MAX ; ++i ) { nthread[i] = 0 ; } - - if ( 0 == comm_rank( MPI_COMM_WORLD ) ) { - if ( 1 < argc && argc < TEST_THREAD_MAX ) { - nthread[0] = 1 ; - nthread[1] = argc - 1 ; - for ( i = 1 ; i < argc ; ++i ) { nthread[i+1] = atoi( argv[i] ); } - } - else { - nthread[0] = 0 ; - nthread[1] = 1 ; - nthread[2] = 1 ; - } - } - - MPI_Bcast( nthread , TEST_THREAD_MAX , MPI_INT , 0 , MPI_COMM_WORLD ); - - if ( nthread[0] ) { - test_accuracy( MPI_COMM_WORLD , nthread[1] , nthread + 2 , test_count ); - test_performance( MPI_COMM_WORLD , nthread[1] , nthread + 2 ); - } - else { - test_accuracy( MPI_COMM_WORLD , nthread[1] , nthread + 2 , 3 ); - } - - MPI_Finalize(); - - return 0 ; -} - -static int comm_size( COMM comm ) -{ - int size = 0 ; - MPI_Comm_size( comm , & size ); - return size ; -} - -static int comm_rank( COMM comm ) -{ - int rank = 0 ; - MPI_Comm_rank( comm , & rank ); - return rank ; -} - -static void comm_reduce_dmax( COMM comm , double * val ) -{ - double tmp ; - if ( MPI_SUCCESS == - MPI_Allreduce( val , & tmp , 1 , MPI_DOUBLE , MPI_MAX , comm ) ) { - *val = tmp ; - } - else { - *val = 0 ; - } -} - -static void comm_reduce_dsum( COMM comm , double * val ) -{ - double tmp ; - if ( MPI_SUCCESS == - MPI_Allreduce( val , & tmp , 1 , MPI_DOUBLE , MPI_SUM , comm ) ) { - *val = tmp ; - } - else { - *val = 0 ; - } -} - -static void comm_reduce_d4_op( void * argin , - void * argout , - int * n , - MPI_Datatype * d ) -{ - if ( d && n && *n == 4 ) { - double * const in = (double*) argin ; - double * const out = (double*) argout ; - d2_add_d( out , in[0] ); - d2_add_d( out , in[1] ); - d2_add_d( out + 2 , in[2] ); - d2_add_d( out + 2 , in[3] ); - } - return ; -} - -static void comm_reduce_d4_sum( COMM comm , double * val ) -{ - double tmp[4] ; - MPI_Op mpi_op = MPI_OP_NULL ; - - /* Use Reduce->Bcast instead of Allreduce due to a bug with the SUN MPI. */ - - MPI_Op_create( comm_reduce_d4_op , 0 , & mpi_op ); - MPI_Reduce( val , tmp , 4 , MPI_DOUBLE , mpi_op , 0 , comm ); - MPI_Bcast( tmp , 4 , MPI_DOUBLE , 0 , comm ); - MPI_Op_free( & mpi_op ); - - val[0] = tmp[0] ; - val[1] = tmp[1] ; - val[2] = tmp[2] ; - val[3] = tmp[3] ; -} - -#else - -int main( int argc , char **argv ) -{ - int nthread[ TEST_THREAD_MAX ]; - int i ; - - for ( i = 0 ; i < TEST_THREAD_MAX ; ++i ) { nthread[i] = 0 ; } - - if ( 1 < argc && argc < TEST_THREAD_MAX ) { - nthread[0] = 1 ; - nthread[1] = argc - 1 ; - for ( i = 1 ; i < argc ; ++i ) { nthread[i+1] = atoi( argv[i] ); } - } - else { - nthread[0] = 0 ; - nthread[1] = 4 ; - nthread[2] = 1 ; - nthread[3] = 2 ; - nthread[4] = 4 ; - nthread[5] = 8 ; - } - - if ( nthread[0] ) { - test_accuracy( 0 , nthread[1] , nthread + 2 , test_count ); - test_performance( 0 , nthread[1] , nthread + 2 ); - } - else { - test_accuracy( 0 , nthread[1] , nthread + 2 , 3 ); - } - - return 0 ; -} - -static int comm_size( COMM comm ) { return comm ? -1 : 1 ; } -static int comm_rank( COMM comm ) { return comm ? -1 : 0 ; } -static void comm_reduce_dmax( COMM comm , double * val ) -{ - if ( comm ) { *val = 0 ; } - return ; -} -static void comm_reduce_dsum( COMM comm , double * val ) -{ - if ( comm ) { *val = 0 ; } - return ; -} -static void comm_reduce_d4_sum( COMM comm , double * val ) -{ - if ( comm ) { val[0] = val[1] = val[2] = val[3] = 0 ; } - return ; -} - -#endif - -/*--------------------------------------------------------------------*/ - diff --git a/kokkos/basic/optional/ThreadPool/test/test_pthreads.c b/kokkos/basic/optional/ThreadPool/test/test_pthreads.c deleted file mode 100644 index 235eb41..0000000 --- a/kokkos/basic/optional/ThreadPool/test/test_pthreads.c +++ /dev/null @@ -1,279 +0,0 @@ -/*------------------------------------------------------------------------*/ -/* TPI: Thread Pool Interface */ -/* Copyright (2008) Sandia Corporation */ -/* */ -/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ -/* license for use of this work by or on behalf of the U.S. Government. */ -/* */ -/* This library is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU Lesser General Public License as */ -/* published by the Free Software Foundation; either version 2.1 of the */ -/* License, or (at your option) any later version. */ -/* */ -/* This library is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* Lesser General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU Lesser General Public */ -/* License along with this library; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ -/* USA */ -/*------------------------------------------------------------------------*/ -/** - * @author H. Carter Edwards - */ - -#include -#include -#include -#include - -/*------------------------------------------------------------------------*/ -/* Test various ways of controling worker threads */ - -typedef struct TestPthreads_struct { - pthread_mutex_t m_lock ; - pthread_cond_t m_cond ; - int m_thread_rank ; - int m_thread_count ; -} TestPthreads ; - -/*------------------------------------------------------------------------*/ -/*------------------------------------------------------------------------*/ - -static void * test_driver( void * arg ) -{ - TestPthreads * const data = (TestPthreads*) arg ; - TestPthreads * const root = data - data->m_thread_rank ; - - /*------------------------------*/ - /* Initializing */ - - pthread_mutex_lock( & data->m_lock ); - - pthread_mutex_lock( & root->m_lock ); - pthread_cond_signal( & root->m_cond ); - pthread_mutex_unlock( & root->m_lock ); - - /*------------------------------*/ - - while ( data->m_thread_rank ) { - pthread_cond_wait( & data->m_cond , & data->m_lock ); - } - pthread_mutex_unlock( & data->m_lock ); - - /*------------------------------*/ - /* Terminating */ - - pthread_mutex_lock( & root->m_lock ); - if ( 0 == --( root->m_thread_count ) ) { - pthread_cond_signal( & root->m_cond ); - } - pthread_mutex_unlock( & root->m_lock ); - - return NULL ; -} - - -static void test_run( pthread_attr_t * const thread_attr , - const int number_threads , - const int number_trials , - const int number_loops , - double * const dt_start_stop , - double * const dt_loop ) -{ - TestPthreads data[ number_threads ]; - double dt_total ; - double dt_run = 0 ; - int j ; - - dt_total = TPI_Walltime(); - - for ( j = 0 ; j < number_trials ; ++j ) { - int i ; - - for ( i = 0 ; i < number_threads ; ++i ) { - pthread_cond_init( & data[i].m_cond , NULL ); - pthread_mutex_init( & data[i].m_lock , NULL ); - data[i].m_thread_rank = i ; - data[i].m_thread_count = number_threads ; - } - - pthread_mutex_lock( & data->m_lock ); - - for ( i = 1 ; i < number_threads ; ++i ) { - pthread_t pt ; - pthread_create( & pt, thread_attr, & test_driver , data + i ); - pthread_cond_wait( & data->m_cond , & data->m_lock ); - pthread_mutex_lock( & data[i].m_lock ); - } - - /* Running */ - - { - double dt = TPI_Walltime(); - int k ; - - for ( k = 1 ; k < number_loops ; ++k ) { - for ( i = 1 ; i < number_threads ; ++i ) { - pthread_cond_signal( & data[i].m_cond ); - pthread_mutex_unlock( & data[i].m_lock ); - } - - /* Work goes here */ - - for ( i = 1 ; i < number_threads ; ++i ) { - pthread_mutex_lock( & data[i].m_lock ); - } - } - - dt_run += TPI_Walltime() - dt ; - } - - /* Termination */ - - --( data->m_thread_count ); - - if ( data->m_thread_count ) { - for ( i = 1 ; i < number_threads ; ++i ) { - data[i].m_thread_rank = 0 ; - pthread_cond_signal( & data[i].m_cond ); - pthread_mutex_unlock( & data[i].m_lock ); - } - - pthread_cond_wait( & data->m_cond , & data->m_lock ); - } - - pthread_mutex_unlock( & data->m_lock ); - - for ( i = 0 ; i < number_threads ; ++i ) { - pthread_cond_destroy( & data[i].m_cond ); - pthread_mutex_destroy( & data[i].m_lock ); - } - } - - dt_total = TPI_Walltime() - dt_total ; - - *dt_loop = 1.0e6 * dt_run / (double) ( number_trials * number_loops ); - *dt_start_stop = 1.0e6 * ( dt_total - dt_run ) / (double) number_trials ; -} - -/*------------------------------------------------------------------------*/ -/*------------------------------------------------------------------------*/ - -static double test_mutex_init_destroy( const int number ) -{ - pthread_mutex_t mutex ; - double dt ; - int i ; - dt = TPI_Walltime(); - for ( i = 0 ; i < number ; ++i ) { - pthread_mutex_init( & mutex , NULL ); - pthread_mutex_destroy( & mutex ); - } - dt = ( TPI_Walltime() - dt ) / (double) number ; - return dt ; -} - -static double test_mutex_lock_unlock( const int number ) -{ - pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER ; - double dt ; - int i ; - - dt = TPI_Walltime(); - for ( i = 0 ; i < number ; ++i ) { - pthread_mutex_lock( & mutex ); - pthread_mutex_unlock( & mutex ); - } - dt = ( TPI_Walltime() - dt ) / (double) number ; - - pthread_mutex_destroy( & mutex ); - return dt ; -} - -/*------------------------------------------------------------------------*/ - -void test_pthreads_performance( int n_test , int * n_concurrent ) -{ - const int n_mutex = 1e4 /* 1e8 */ ; - const int n_trial = 1e2 /* 1e4 */ ; - const int n_loop = 1e3 /* 1e4 */ ; - - { - const double dt = 1e6 * test_mutex_init_destroy( n_mutex ); - fprintf(stdout,"\n\"test pthreads mutex init/destroy (microsec)\" , %g\n",dt); - } - - { - const double dt = 1e6 * test_mutex_lock_unlock( n_mutex ); - fprintf(stdout,"\n\"test pthreads mutex lock/unlock (microsec)\" , %g\n",dt); - } - - /*------------------------------------------------------------------*/ - - { - int i ; - - pthread_attr_t thread_attr ; - - fprintf(stdout,"\n\"test pthreads SCOPE_SYSTEM run-blocking\"\n"); - fprintf(stdout,"\"#Threads\" , \"#Spawned\" \"Spawn (microsec)\" , \"Loop (microsec)\"\n"); - - pthread_attr_init( & thread_attr ); - pthread_attr_setscope( & thread_attr, PTHREAD_SCOPE_SYSTEM ); - pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_DETACHED ); - - for ( i = 0 ; i < n_test ; ++i ) { - const int nthread = n_concurrent[i] ; - double dt_start_stop , dt_loop ; - - test_run( & thread_attr, nthread, n_trial, n_loop, - & dt_start_stop , & dt_loop ); - - fprintf( stdout, "%d , %d , %g , %g\n", - nthread , nthread - 1 , dt_start_stop , dt_loop ); - fflush( stdout ); - } - - pthread_attr_destroy( & thread_attr ); - } - - /*------------------------------------------------------------------*/ - - { - int i ; - - pthread_attr_t thread_attr ; - - fprintf(stdout,"\n\"test pthreads SCOPE_PROCESS run-blocking\"\n"); - fprintf(stdout,"\"#Threads\" , \"#Spawned\" \"Spawn (microsec)\" , \"Loop (microsec)\"\n"); - - pthread_attr_init( & thread_attr ); - pthread_attr_setscope( & thread_attr, PTHREAD_SCOPE_PROCESS ); - pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_DETACHED ); - - for ( i = 0 ; i < n_test ; ++i ) { - const int nthread = n_concurrent[i] ; - double dt_start_stop , dt_loop ; - - test_run( & thread_attr, nthread, n_trial, n_loop, - & dt_start_stop , & dt_loop ); - - fprintf( stdout, "%d , %d , %g , %g\n", - nthread , nthread - 1 , dt_start_stop , dt_loop ); - fflush( stdout ); - } - - pthread_attr_destroy( & thread_attr ); - } - - /*------------------------------------------------------------------*/ - - fflush( stdout ); -} - -/*------------------------------------------------------------------------*/ - - diff --git a/kokkos/basic/optional/ThreadPool/test/test_tpi.cpp b/kokkos/basic/optional/ThreadPool/test/test_tpi.cpp deleted file mode 100644 index cf5a649..0000000 --- a/kokkos/basic/optional/ThreadPool/test/test_tpi.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/*------------------------------------------------------------------------*/ -/* TPI: Thread Pool Interface */ -/* Copyright (2008) Sandia Corporation */ -/* */ -/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ -/* license for use of this work by or on behalf of the U.S. Government. */ -/* */ -/* This library is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU Lesser General Public License as */ -/* published by the Free Software Foundation; either version 2.1 of the */ -/* License, or (at your option) any later version. */ -/* */ -/* This library is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* Lesser General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU Lesser General Public */ -/* License along with this library; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ -/* USA */ -/*------------------------------------------------------------------------*/ -/** - * @author H. Carter Edwards - */ - -#include -#include -#include - -/*------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------*/ - -template class TEST ; - -template -class TEST { -public: - int m_flag[N] ; - ~TEST() {} - TEST(); - void flag( TPI::Work & ); - void verify(); -private: - TEST( const TEST & ); - TEST & operator = ( const TEST & ); -}; - -template -TEST::TEST() -{ - for ( unsigned i = 0 ; i < N ; ++i ) { m_flag[i] = 0 ; } -} - -template -void TEST::flag( TPI::Work & work ) -{ - static const char method[] = "TEST::flag" ; - if ( work.count != (int) N ) { - std::cerr << method - << "<" << N << "> count(" << work.count << ") failed" - << std::endl ; - throw std::exception(); - } - m_flag[ work.rank ] = 1 ; -} - -template -void TEST::verify() -{ - static const char method[] = "TEST::verify" ; - - for ( unsigned i = 0 ; i < N ; ++i ) { - if ( ! m_flag[i] ) { - std::cerr << method - << "<" << N << "> m_flag[" << i << "] failed" - << std::endl ; - throw std::exception(); - } - else { - m_flag[i] = 0 ; - } - } -} - -void test_tpi_cpp( int np ) -{ - TEST<1> test_1 ; - TEST<2> test_2 ; - TEST<4> test_4 ; - TEST<8> test_8 ; - TEST<16> test_16 ; - - TPI::Init( np ); - - TPI::Run( test_1 , & TEST<1>::flag , 1 ); - TPI::Run( test_2 , & TEST<2>::flag , 2 ); - TPI::Run( test_4 , & TEST<4>::flag , 4 ); - TPI::Run( test_8 , & TEST<8>::flag , 8 ); - TPI::Run( test_16 , & TEST<16>::flag , 16 ); - - test_1.verify(); - test_2.verify(); - test_4.verify(); - test_8.verify(); - test_16.verify(); - - TPI::Finalize(); -} - -int main( int argc , char ** argv ) -{ - if ( argc ) { std::cout << argv[0] ; } - else { std::cout << "test" ; } - test_tpi_cpp(1); std::cout << " 1 " ; - test_tpi_cpp(2); std::cout << " 2 " ; - test_tpi_cpp(4); std::cout << " 4 " ; - test_tpi_cpp(8); std::cout << " 8 " ; - test_tpi_cpp(16); std::cout << " 16 " ; - std::cout << " passed" << std::endl ; - return 0 ; -} - diff --git a/kokkos/basic/optional/ThreadPool/test/test_tpi_unit.c b/kokkos/basic/optional/ThreadPool/test/test_tpi_unit.c deleted file mode 100644 index 34faef8..0000000 --- a/kokkos/basic/optional/ThreadPool/test/test_tpi_unit.c +++ /dev/null @@ -1,505 +0,0 @@ -/*------------------------------------------------------------------------*/ -/* TPI: Thread Pool Interface */ -/* Copyright (2008) Sandia Corporation */ -/* */ -/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ -/* license for use of this work by or on behalf of the U.S. Government. */ -/* */ -/* This library is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU Lesser General Public License as */ -/* published by the Free Software Foundation; either version 2.1 of the */ -/* License, or (at your option) any later version. */ -/* */ -/* This library is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* Lesser General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU Lesser General Public */ -/* License along with this library; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ -/* USA */ -/*------------------------------------------------------------------------*/ -/** - * @author H. Carter Edwards - */ - -#include -#include -#include -#include - -#if defined( HAVE_MPI ) -#include -#endif - -/*--------------------------------------------------------------------*/ - -static void test_work( TPI_Work * ); -static void test_reduce_work( TPI_Work * ); -static void test_reduce_init( TPI_Work * ); -static void test_reduce_join( TPI_Work * , const void * ); -static void test_reduce_via_lock( TPI_Work * ); -static void test_reduce_via_nolock( TPI_Work * ); - -void test_tpi_init( const int ntest, const int nthread[], const int ntrial); -void test_tpi_block( const int ntest, const int nthread[], const int ntrial); -void test_tpi_reduce( const int ntest, const int nthread[], const int ntrial); -void test_tpi_work( const int ntest, const int nthread[], - const int nwork , const int ntrial ); -void test_tpi_work_async( - const int ntest , const int nthread[] , const int nwork , const int ntrial ); - -int main( int argc , char ** argv ) -{ - int num_thread[] = { 1 , 2 , 4 , 6 , 8 , 12 , 16 }; - int num_test = sizeof(num_thread) / sizeof(int); - -#if defined( HAVE_MPI ) - int rank ; - - MPI_Init( & argc , & argv ); - MPI_Comm_rank( MPI_COMM_WORLD , & rank ); - if ( 0 == rank ) { -#endif - - const int ntrial = 1 < argc ? atoi( argv[1] ) : 5 ; - const int nwork = 2 < argc ? atoi( argv[2] ) : 100 ; - - /* Get the configuration print message out. */ - fprintf( stdout , "\"%s\"\n" , TPI_Version() ); - fprintf( stdout , "\"Unit Testing: ntrial = %d , nwork = %d\"\n" , ntrial , nwork ); - - test_tpi_init( num_test , num_thread , ntrial ); - test_tpi_block( num_test , num_thread , ntrial ); - test_tpi_reduce( num_test , num_thread , ntrial ); - test_tpi_work( num_test , num_thread , nwork , ntrial ); - test_tpi_work_async( num_test , num_thread , nwork , ntrial ); - -#if defined( HAVE_MPI ) - } - MPI_Finalize(); -#endif - - return 0 ; -} - -/*--------------------------------------------------------------------*/ - -void test_tpi_init( const int ntest , const int nthread[] , const int ntrial ) -{ - int j ; - - fprintf( stdout , "\n\"TEST TPI_Init / TPI_Finalize\"\n" ); - fprintf( stdout , "\"#Thread\" , \"#Trial\" , \"TPI_Init(avg-msec)\" , \"TPI_Init(stddev-msec)\" , \"TPI_Finalize(avg-msec)\" , \"TPI_Finalize(stddev-msec)\"\n"); - - for ( j = 0 ; j < ntest ; ++j ) { - const int nth = nthread[j]; - double dt_init_total = 0.0 ; - double dt_init_total_2 = 0.0 ; - double dt_fin_total = 0.0 ; - double dt_fin_total_2 = 0.0 ; - int i ; - int result ; - - for ( i = 0 ; i < ntrial ; ++i ) { - double t , dt ; - - t = TPI_Walltime(); - result = TPI_Init( nth ); - dt = TPI_Walltime() - t ; - dt_init_total += dt ; - dt_init_total_2 += dt * dt ; - - if ( result != nth ) { - fprintf(stderr,"%d != TPI_Init(%d) : FAILED at trial %d\n", - result , nth , i ); - abort(); - } - - t = TPI_Walltime(); - TPI_Finalize(); - dt = TPI_Walltime() - t ; - dt_fin_total += dt ; - dt_fin_total_2 += dt * dt ; - } - - if ( 1 < ntrial ) { - const double init_mean = 1.0e6 * dt_init_total / ntrial ; - const double init_sdev = 1.0e6 * sqrt( ( ntrial * dt_init_total_2 - - dt_init_total * dt_init_total ) / - ( ntrial * ( ntrial - 1 ) ) ); - - const double fin_mean = 1.0e6 * dt_fin_total / ntrial ; - const double fin_sdev = 1.0e6 * sqrt( ( ntrial * dt_fin_total_2 - - dt_fin_total * dt_fin_total ) / - ( ntrial * ( ntrial - 1 ) ) ); - - fprintf(stdout,"%d , %d , %10g , %10g , %10g , %10g\n", - nth , ntrial , init_mean , init_sdev , fin_mean , fin_sdev ); - } - } -} - -/*--------------------------------------------------------------------*/ - -void test_tpi_block( const int ntest , const int nthread[] , const int ntrial ) -{ - int i, j ; - - fprintf( stdout , "\n\"TEST TPI_Block / TPI_Unblock\"\n" ); - fprintf( stdout , "\"#Thread\" , \"#Trial\" , \"TPI_Block(avg-msec)\" , \"TPI_Block(stddev-msec)\" , \"TPI_Unblock(avg-msec)\" , \"TPI_Unblock(stddev-msec)\"\n"); - - for ( j = 0 ; j < ntest ; ++j ) { - const int nth = nthread[j]; - - double dt_block_total = 0.0 ; - double dt_block_total_2 = 0.0 ; - double dt_unblock_total = 0.0 ; - double dt_unblock_total_2 = 0.0 ; - - int result = TPI_Init( nth ); - - if ( result != nth ) { - fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth ); - abort(); - } - - for ( i = 0 ; i < ntrial ; ++i ) { - double t , dt ; - - t = TPI_Walltime(); - TPI_Block(); - dt = TPI_Walltime() - t ; - dt_block_total += dt ; - dt_block_total_2 += dt * dt ; - - - t = TPI_Walltime(); - TPI_Unblock(); - dt = TPI_Walltime() - t ; - dt_unblock_total += dt ; - dt_unblock_total_2 += dt * dt ; - } - - TPI_Finalize(); - - if ( 1 < ntrial ) { - const double block_mean = 1.0e6 * dt_block_total / ntrial ; - const double block_sdev = 1.0e6 * sqrt( ( ntrial * dt_block_total_2 - - dt_block_total * dt_block_total ) / - ( ntrial * ( ntrial - 1 ) ) ); - - const double unblock_mean = 1.0e6 * dt_unblock_total / ntrial ; - const double unblock_sdev = 1.0e6 * sqrt( ( ntrial * dt_unblock_total_2 - - dt_unblock_total * dt_unblock_total) / - ( ntrial * ( ntrial - 1 ) ) ); - - fprintf(stdout,"%d , %d , %10g , %10g , %10g , %10g\n", - nth , ntrial , block_mean , block_sdev , unblock_mean , unblock_sdev ); - } - } -} - -/*--------------------------------------------------------------------*/ - -void test_tpi_reduce( const int ntest , const int nthread[] , const int ntrial ) -{ - int j ; - - fprintf( stdout , "\n\"TEST TPI_Run_threads(reduce) / TPI_Run_threads_reduce\"\n" ); - fprintf( stdout , "\"#Thread\" , \"#Trial\" , \"TPI_Run_threads(avg-msec)\" , \"TPI_Run_threads(stddev-msec)\" , \"TPI_Run_threads_reduce(avg-msec)\" , \"TPI_Run_threads_reduce(stddev-msec)\"\n"); - - for ( j = 0 ; j < ntest ; ++j ) { - const int nth = nthread[j]; - - double dt_lock_total = 0.0 ; - double dt_lock_total_2 = 0.0 ; - double dt_reduce_total = 0.0 ; - double dt_reduce_total_2 = 0.0 ; - int i ; - - int result = TPI_Init( nth ); - - if ( result != nth ) { - fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth ); - } - - for ( i = 0 ; i < ntrial ; ++i ) { - double t , dt ; - int value = 0 ; - int * const ptr = & value ; - - t = TPI_Walltime(); - TPI_Run_threads( test_reduce_via_lock , & ptr , 1 ); - dt = TPI_Walltime() - t ; - dt_lock_total += dt ; - dt_lock_total_2 += dt * dt ; - - if ( value != nth ) { - fprintf(stderr, - "TPI_Run_threads(reduce,...) : FAILED at trial %d\n", - i ); - abort(); - } - - value = 0 ; - - t = TPI_Walltime(); - TPI_Run_threads_reduce( test_reduce_via_nolock , NULL , - test_reduce_join , test_reduce_init , - sizeof(value) , & value ); - - dt = TPI_Walltime() - t ; - dt_reduce_total += dt ; - dt_reduce_total_2 += dt * dt ; - - if ( value != nth ) { - fprintf(stderr, - "TPI_Run_threads_reduce(...) : FAILED at trial %d\n", - i ); - abort(); - } - } - - TPI_Finalize(); - - if ( 1 < ntrial ) { - const double lock_mean = 1.0e6 * dt_lock_total / ntrial ; - const double lock_sdev = 1.0e6 * sqrt( ( ntrial * dt_lock_total_2 - - dt_lock_total * dt_lock_total ) / - ( ntrial * ( ntrial - 1 ) ) ); - - const double reduce_mean = 1.0e6 * dt_reduce_total / ntrial ; - const double reduce_sdev = 1.0e6 * sqrt( ( ntrial * dt_reduce_total_2 - - dt_reduce_total * dt_reduce_total) / - ( ntrial * ( ntrial - 1 ) ) ); - - fprintf(stdout,"%d , %d , %10g , %10g , %10g , %10g\n", - nth, ntrial, lock_mean, lock_sdev, reduce_mean, reduce_sdev); - } - } -} - -/*--------------------------------------------------------------------*/ - -void test_tpi_work( const int ntest , const int nthread[] , const int nwork , - const int ntrial ) -{ - int * const flags = (int *) malloc( sizeof(int) * nwork ); - int j ; - - fprintf( stdout , "\n\"TEST TPI_Run / TPI_Run_reduce\"\n" ); - fprintf( stdout , "\"#Thread\" , \"#Work\" , \"#Trial\" , \"TPI_Run(avg-msec)\" , \"TPI_Run(stddev-msec)\" , \"TPI_Run_reduce(avg-msec)\" , \"TPI_Run_reduce(stddev-msec)\"\n"); - - for ( j = 0 ; j < ntest ; ++j ) { - const int nth = nthread[j]; - - double dt_work_total = 0.0 ; - double dt_work_total_2 = 0.0 ; - double dt_reduce_total = 0.0 ; - double dt_reduce_total_2 = 0.0 ; - int i , k ; - - int result = TPI_Init( nth ); - - if ( result != nth ) { - fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth ); - } - - for ( i = 0 ; i < ntrial ; ++i ) { - double t , dt ; - int value = 0 ; - - for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; } - - t = TPI_Walltime(); - TPI_Run( test_work , & flags , nwork , 0 ); - dt = TPI_Walltime() - t ; - dt_work_total += dt ; - dt_work_total_2 += dt * dt ; - - for ( k = 0 ; k < nwork && flags[k] ; ++k ); - - if ( k < nwork ) { - fprintf(stderr, "TPI_Run(...) : FAILED at trial %d\n", i ); - abort(); - } - - for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; } - - t = TPI_Walltime(); - TPI_Run_reduce( test_reduce_work , & flags , nwork , - test_reduce_join , test_reduce_init , - sizeof(value) , & value ); - - dt = TPI_Walltime() - t ; - dt_reduce_total += dt ; - dt_reduce_total_2 += dt * dt ; - - for ( k = 0 ; k < nwork && flags[k] ; ++k ); - - if ( value != nwork || k < nwork ) { - fprintf(stderr, "TPI_Run_reduce(...) : FAILED at trial %d\n", i ); - abort(); - } - } - - TPI_Finalize(); - - if ( 1 < ntrial ) { - const double work_mean = 1.0e6 * dt_work_total / ntrial ; - const double work_sdev = 1.0e6 * sqrt( ( ntrial * dt_work_total_2 - - dt_work_total * dt_work_total ) / - ( ntrial * ( ntrial - 1 ) ) ); - - const double reduce_mean = 1.0e6 * dt_reduce_total / ntrial ; - const double reduce_sdev = 1.0e6 * sqrt( ( ntrial * dt_reduce_total_2 - - dt_reduce_total * dt_reduce_total) / - ( ntrial * ( ntrial - 1 ) ) ); - - fprintf(stdout,"%d , %d , %d , %10g , %10g , %10g , %10g\n", - nth, ntrial, nwork, work_mean, work_sdev, reduce_mean, reduce_sdev); - } - } - - free( flags ); -} - -/*--------------------------------------------------------------------*/ - -void test_tpi_work_async( - const int ntest , const int nthread[] , const int nwork , const int ntrial ) -{ - int * const flags = (int *) malloc( sizeof(int) * nwork ); - int j ; - - fprintf( stdout , "\n\"TEST TPI_Start / TPI_Start_reduce\"\n" ); - fprintf( stdout , "\"#Thread\" , \"#Work\" , \"#Trial\" , \"TPI_Start(avg-msec)\" , \"TPI_Start(stddev-msec)\" , \"TPI_Start_reduce(avg-msec)\" , \"TPI_Start_reduce(stddev-msec)\"\n"); - - for ( j = 0 ; j < ntest ; ++j ) { - const int nth = nthread[j]; - - double dt_work_total = 0.0 ; - double dt_work_total_2 = 0.0 ; - double dt_reduce_total = 0.0 ; - double dt_reduce_total_2 = 0.0 ; - int i , k ; - - int result = TPI_Init( nth ); - - if ( result != nth ) { - fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth ); - } - - for ( i = 0 ; i < ntrial ; ++i ) { - double t , dt ; - int value = 0 ; - - for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; } - - t = TPI_Walltime(); - TPI_Start( test_work , & flags , nwork , 0 ); - TPI_Wait(); - dt = TPI_Walltime() - t ; - dt_work_total += dt ; - dt_work_total_2 += dt * dt ; - - for ( k = 0 ; k < nwork && flags[k] ; ++k ); - - if ( k < nwork ) { - fprintf(stderr, "TPI_Run(...) : FAILED at trial %d\n", i ); - abort(); - } - - for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; } - - t = TPI_Walltime(); - - TPI_Start_reduce( test_reduce_work , & flags , nwork , - test_reduce_join , test_reduce_init , - sizeof(value) , & value ); - TPI_Wait(); - - dt = TPI_Walltime() - t ; - dt_reduce_total += dt ; - dt_reduce_total_2 += dt * dt ; - - for ( k = 0 ; k < nwork && flags[k] ; ++k ); - - if ( value != nwork || k < nwork ) { - fprintf(stderr, "TPI_Run_reduce(...) : FAILED at trial %d\n", i ); - abort(); - } - } - - TPI_Finalize(); - - if ( 1 < ntrial ) { - const double work_mean = 1.0e6 * dt_work_total / ntrial ; - const double work_sdev = 1.0e6 * sqrt( ( ntrial * dt_work_total_2 - - dt_work_total * dt_work_total ) / - ( ntrial * ( ntrial - 1 ) ) ); - - const double reduce_mean = 1.0e6 * dt_reduce_total / ntrial ; - const double reduce_sdev = 1.0e6 * sqrt( ( ntrial * dt_reduce_total_2 - - dt_reduce_total * dt_reduce_total) / - ( ntrial * ( ntrial - 1 ) ) ); - - fprintf(stdout,"%d , %d , %d , %10g , %10g , %10g , %10g\n", - nth, ntrial, nwork, work_mean, work_sdev, reduce_mean, reduce_sdev); - } - } - - free( flags ); -} - -/*--------------------------------------------------------------------*/ - -static void test_work( TPI_Work * work ) -{ - int * const flags = * (int *const*) work->info ; - flags[ work->rank ] = 1 ; -} - -static void test_reduce_work( TPI_Work * work ) -{ - int * const flags = * (int *const*) work->info ; - flags[ work->rank ] = 1 ; - - *((int *) work->reduce) += 1 ; -} - -static void test_reduce_init( TPI_Work * work ) -{ - *((int *) work->reduce) = 0 ; -} - -static void test_reduce_join( TPI_Work * work , const void * src ) -{ - *((int *) work->reduce) += *( (const int *) src ); -} - -static void test_reduce_via_lock( TPI_Work * work ) -{ - int * const value = * ((int *const*) work->info ); - int result ; - if ( ( result = TPI_Lock(0) ) ) { - fprintf(stderr,"TPI_Lock(0) = %d : FAILED\n", result); - abort(); - } - *value += 1 ; - if ( ( result = TPI_Unlock(0) ) ) { - fprintf(stderr,"TPI_Unlock(0) = %d : FAILED\n", result); - abort(); - } -} - -static void test_reduce_via_nolock( TPI_Work * work ) -{ - int * const value = (int *) work->reduce ; - *value += 1 ; -} - -/*--------------------------------------------------------------------*/ - diff --git a/kokkos/basic/optional/copy_from_trilinos b/kokkos/basic/optional/copy_from_trilinos deleted file mode 100755 index 042e4fb..0000000 --- a/kokkos/basic/optional/copy_from_trilinos +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -TRILINOS_SRC=$1 - -if [ -d "${TRILINOS_SRC}" -a -d "${TRILINOS_SRC}/packages" ] ; -then - -#----------------------------------------------------------------------- -cp -r ${TRILINOS_SRC}/packages/ThreadPool/* ThreadPool -rm -rf ThreadPool/doc - -cat << END_CAT > ThreadPool/ThreadPool_config.h -#ifndef HAVE_PTHREAD -#define HAVE_PTHREAD -#endif -END_CAT - -#----------------------------------------------------------------------- - -else - - echo 'usage: ' $0 '' - -fi - diff --git a/kokkos/basic/optional/cuda/CudaCall.hpp b/kokkos/basic/optional/cuda/CudaCall.hpp deleted file mode 100644 index f4b8c70..0000000 --- a/kokkos/basic/optional/cuda/CudaCall.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef stk_algsup_CudaCall_hpp -#define stk_algsup_CudaCall_hpp - -#include -#include - -//---------------------------------------------------------------- -inline -void stk_cuda_call(cudaError err , const char* name ) -{ - if ( err != cudaSuccess ) { - fprintf(stderr, "%s error: %s\n",name, cudaGetErrorString(err) ); - exit(-1); - } -} - -#define CUDA_CALL( cuda_fn ) stk_cuda_call( cuda_fn , #cuda_fn ) - - -#endif - diff --git a/kokkos/basic/optional/cuda/CudaMemoryModel.hpp b/kokkos/basic/optional/cuda/CudaMemoryModel.hpp deleted file mode 100644 index 54d189e..0000000 --- a/kokkos/basic/optional/cuda/CudaMemoryModel.hpp +++ /dev/null @@ -1,152 +0,0 @@ -#ifndef _CudaMemoryModel_hpp_ -#define _CudaMemoryModel_hpp_ - -#include -#ifdef MINIFE_HAVE_CUDA - -#include -#include -#include - -#include -#include -#include - -class CudaMemoryModel { - public: - CudaMemoryModel() - : host_to_device_map(), - device_to_host_map() - {} - - /** Destructor - * Upon destruction this class de-allocates all device-buffers that - * it was tracking. - */ - virtual ~CudaMemoryModel(); - - /** Return a device-pointer corresponding to the given host-ptr and size. - * The returned device-pointer points to a buffer which has been allocated - * on the CUDA device with length buf_size*sizeof(T), but not initialized. - * - * If a device-pointer has already been allocated for the given host-pointer - * (by a previous call to this method) then that (previously-allocated) device-pointer - * is returned. - */ - template - T* get_buffer(const T* host_ptr, size_t buf_size); - - /** Destroy (free) the specified device-pointer. - * - * De-allocates the cuda-device buffer. - */ - template - void destroy_buffer(T*& device_ptr); - - /** Copy the contents of the given host-ptr to the given device-ptr. - * If the given device-ptr is not known (was not created by a previous - * call to get_buffer), an exception is thrown. - */ - template - void copy_to_buffer(const T* host_ptr, size_t buf_size, T* device_ptr); - - /** Copy the contents of the given device-ptr to the given host-ptr. - * If the given device-ptr is not known (was not created by a previous - * call to get_buffer), an exception is thrown. - */ - template - void copy_from_buffer(T* host_ptr, size_t buf_size, const T* device_ptr); - - private: - std::map host_to_device_map; - std::map device_to_host_map; -}; - -//------------------------------------------------------------------------------ -template -inline -T* CudaMemoryModel::get_buffer(const T* host_ptr, size_t buf_size) -{ - T* device_ptr = NULL; - - std::map::iterator iter = host_to_device_map.find(host_ptr); - - if (iter == host_to_device_map.end()) { - CUDA_CALL( cudaMalloc( (void**)&device_ptr, sizeof(T)*buf_size) ); - - host_to_device_map.insert( std::make_pair(host_ptr, device_ptr) ); - device_to_host_map.insert( std::make_pair(device_ptr, host_ptr) ); - } - else { - device_ptr = reinterpret_cast(iter->second); - } - - return device_ptr; -} - -//------------------------------------------------------------------------------ -template -inline -void CudaMemoryModel::destroy_buffer(T*& device_ptr) -{ - std::map::iterator iter = device_to_host_map.find(device_ptr); - if (iter != device_to_host_map.end()) { - const void* host_ptr = iter->second; - if (host_ptr != NULL) { - std::map::iterator iter2 = host_to_device_map.find(host_ptr); - if (iter2 != host_to_device_map.end()) { - host_to_device_map.erase(iter2); - } - } - CUDA_CALL( cudaFree(device_ptr) ); - device_ptr = NULL; - device_to_host_map.erase(iter); - } -} - -//------------------------------------------------------------------------------ -template -inline -void CudaMemoryModel::copy_to_buffer(const T* host_ptr, size_t buf_size, T* device_ptr) -{ - std::map::iterator iter = device_to_host_map.find(device_ptr); - if (iter == device_to_host_map.end()) { - //failed to find device_ptr in device_to_host_map - throw std::runtime_error("CudaMemoryModel::copy_to_buffer ERROR, device_ptr not known."); - } - - CUDA_CALL( cudaMemcpy( device_ptr, host_ptr, sizeof(T)*buf_size, cudaMemcpyHostToDevice) ); -} - -//------------------------------------------------------------------------------ -template -inline -void CudaMemoryModel::copy_from_buffer(T* host_ptr, size_t buf_size, const T* device_ptr) -{ - std::map::iterator iter = device_to_host_map.find(device_ptr); - if (iter == device_to_host_map.end()) { - //failed to find device_ptr in device_to_host_map - throw std::runtime_error("CudaMemoryModel::copy_from_buffer ERROR, device_ptr not known."); - } - - CUDA_CALL( cudaMemcpy( host_ptr, device_ptr, sizeof(T)*buf_size, cudaMemcpyDeviceToHost) ); -} - -inline -CudaMemoryModel::~CudaMemoryModel() -{ - std::map::iterator - iter = device_to_host_map.begin(), - iter_end = device_to_host_map.end(); - - for(; iter!=iter_end; ++iter) { - //cast away const so we can free the pointer: - void* dev_ptr = const_cast(iter->first); - CUDA_CALL( cudaFree(dev_ptr) ); - } -} - -#endif - -#endif - diff --git a/kokkos/basic/optional/cuda/CudaNode.cpp b/kokkos/basic/optional/cuda/CudaNode.cpp deleted file mode 100644 index 5ddc580..0000000 --- a/kokkos/basic/optional/cuda/CudaNode.cpp +++ /dev/null @@ -1,96 +0,0 @@ -#include -#include -#include -#include - -// some CUDA rules of thumb employed here (stolen from slides by Mike Bailey, Oregon State) -// -The number of Blocks should be at least twice the number of MPs -// -The number of Threads per Block should be a multiple of 64 -// - 192 or 256 are good numbers for Threads/Block -// We will enforce that numThreads is a power of two (to ease the reduction kernel) -// greater than 64 - -CUDANode::CUDANode(int device, int numBlocks, int numThreads, int verbose) -: numBlocks_(numBlocks) -, numThreads_(numThreads) -, h_blk_mem_(NULL) -, d_blk_mem_(NULL) -, blk_mem_size_(0) -{ - using std::cout; - using std::endl; - using std::runtime_error; - // enforce that numThreads_ is a multiple of 64 - if (numThreads_ != 64 && numThreads_ != 128 && numThreads_ != 256 && numThreads_ != 512 - && numThreads_ != 1 && numThreads_ != 2 && numThreads_ != 4 && numThreads_ != 8 && numThreads_ != 16 - && numThreads_ != 32) { -// throw runtime_error("CUDANode::CUDANode(): number of threads per block must be a power of two in [1,512]."); - } - int deviceCount; cudaGetDeviceCount(&deviceCount); - if (device >= deviceCount) { - if (deviceCount == 0) { -// throw runtime_error("CUDANode::CUDANode(): system has no CUDA devices."); - } - if (verbose) { - cout << "CUDANode::CUDANode(): specified device number not valid. Using device 0." << endl; - } - device = 0; - } - cudaDeviceProp deviceProp; - int deviceAlreadyBeingUsed = -1; - cudaGetDevice(&deviceAlreadyBeingUsed); - if (deviceAlreadyBeingUsed >= 0 && deviceAlreadyBeingUsed < deviceCount) { - device = deviceAlreadyBeingUsed; - } - else { - cudaSetDevice(device); - } - cudaGetDeviceProperties(&deviceProp, device); - // as of CUDA 2.1, device prop contains the following fields - // char name[256]; - // size_t totalGlobalMem, sharedMemPerBlock; - // int regsPerBlock, warpSize; - // size_t memPitch; - // int maxThreadsPerBlock, maxThreadsDim[3], maxGridSize[3]; - // size_t totalConstMem; - // int major, minor; - // int clockRate; - // size_t textureAlignment; - // int deviceOverlap; - // int multiProcessorCount; - // int kernelExecTimeoutEnabled; - if (verbose) { - cout << "CUDANode attached to device #" << device << " \"" << deviceProp.name - << "\", of compute capability " << deviceProp.major << "." << deviceProp.minor - << endl; - } - totalMem_ = deviceProp.totalGlobalMem; - - expand_blk_mem(numBlocks_*8); -} - -void CUDANode::expand_blk_mem(size_t size_in_bytes) -{ - if (blk_mem_size_ >= size_in_bytes) return; - - if (d_blk_mem_ != NULL) { - cutilSafeCallNoSync( cudaFree(d_blk_mem_) ); - delete [] h_blk_mem_; - } - - cutilSafeCallNoSync( cudaMalloc(&d_blk_mem_, size_in_bytes) ); - h_blk_mem_ = new char[size_in_bytes]; - blk_mem_size_ = size_in_bytes; -} - -CUDANode::~CUDANode() -{ - if (d_blk_mem_ != NULL) { - cutilSafeCallNoSync( cudaFree(d_blk_mem_) ); - d_blk_mem_ = NULL; - delete [] h_blk_mem_; - h_blk_mem_ = NULL; - } - blk_mem_size_ = 0; -} - diff --git a/kokkos/basic/optional/cuda/CudaNode.cuh b/kokkos/basic/optional/cuda/CudaNode.cuh deleted file mode 100644 index 9b1b4fb..0000000 --- a/kokkos/basic/optional/cuda/CudaNode.cuh +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef CUDANODE_CUH_ -#define CUDANODE_CUH_ - -#include -#include -#include -#include -#include - -// must define this before including any kernels -#define KERNEL_PREFIX __device__ __host__ - -#include - -#include - -#ifdef CUDANODE_INCLUDE_PARALLEL_FOR -template -__global__ void -Tkern1D(int length, WDP wd, int stride) -{ - unsigned int i = blockIdx.x*blockDim.x + threadIdx.x; - while(i < length) { - wd(i); - i += stride; - } -} - -template -void CUDANode::parallel_for(int length, WDP wd) { - if (length == 0) return; - unsigned int stride = numThreads_ * numBlocks_; - Tkern1D <<< numBlocks_, numThreads_ >>>(length,wd,stride); -} -#endif // parallel_for - -#ifdef CUDANODE_INCLUDE_PARALLEL_REDUCE -template -void call_dot(DotOp& wd) -{ - printf("ERROR, unknown scalar-type, skipping cuda dot-product.\n"); -} -template<> -void call_dot(DotOp& wd) -{ - wd.result = cublasDdot(wd.n, wd.x, 1, wd.y, 1); -} -template<> -void call_dot(DotOp& wd) -{ - wd.result = cublasSdot(wd.n, wd.x, 1, wd.y, 1); -} - -template -void CUDANode::parallel_reduce(int length, WDP& wd) -{ - if (length == 1) { - wd.result = wd.generate(0); - return; - } - - call_dot(wd); -} -#endif // parallel_reduce - -#endif diff --git a/kokkos/basic/optional/cuda/CudaNode.hpp b/kokkos/basic/optional/cuda/CudaNode.hpp deleted file mode 100644 index de078ea..0000000 --- a/kokkos/basic/optional/cuda/CudaNode.hpp +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef CUDANODE_HPP_ -#define CUDANODE_HPP_ - -#include - -// forward declaration -class CUDANode; - -class CUDANode : public CudaMemoryModel { - public: - - CUDANode(int device = 0, int numBlocks = -1, int numThreads = 256, int verbose = 1); - - ~CUDANode(); - - //@{ Computational methods - - template - void parallel_for(int length, WDP wdp); - - template - void parallel_reduce(int length, WDP& wd); - - //@} - - static CUDANode& singleton(int device=0, int numBlocks=-1, int numThreads=256) - { - static CUDANode* cuda_node = NULL; - if (cuda_node == NULL) { - cuda_node = new CUDANode(device, numBlocks, numThreads); - } - return *cuda_node; - } - - private: - //template - //void call_reduce(int length, WDP wd, int threads, int blocks, void * d_blkpart); - // numBlocks_ is - // - the number of blocks launched in a call to parallel_for() - // - not used by parallel_reduce() - int numBlocks_; - // numThreads_ is required to be a power-of-two (our requirement) between 1 and 512 (CUDA's requirement). It is: - // - the maximum number of threads used by parallel_reduce() - // - the number of threads per block in a call to parallel_for() - int numThreads_; - // total global device memory, in bytes - int totalMem_; - - void expand_blk_mem(size_t size_in_bytes); - - char* h_blk_mem_; - void* d_blk_mem_; - size_t blk_mem_size_; - -}; - -#endif diff --git a/kokkos/basic/optional/cuda/CudaNodeImpl.hpp b/kokkos/basic/optional/cuda/CudaNodeImpl.hpp deleted file mode 100644 index 4b94562..0000000 --- a/kokkos/basic/optional/cuda/CudaNodeImpl.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef CUDANODE_IMPL_HPP_ -#define CUDANODE_IMPL_HPP_ - -#include -#include -#include -#include -#include -#include - -// TODO: consider using cudaMallocHost to allocate page-locked host memory -// this speeds up transfer between device and host, and could be very -// useful in the case of Import/Export multivector operations - -#endif diff --git a/kokkos/basic/optional/cuda/Matrix.cu b/kokkos/basic/optional/cuda/Matrix.cu deleted file mode 100644 index 1487f1a..0000000 --- a/kokkos/basic/optional/cuda/Matrix.cu +++ /dev/null @@ -1,22 +0,0 @@ -#define CUDANODE_INCLUDE_PARALLEL_FOR - -// include for CudaNode method implementations -#include - -// includes for all operators for which Matrix needs support -#include -#include -#include - -#include -#include - -// explicit instantiations for Matrix class -#define EXPLICIT_MATRIX_SUPPORT(MATRIX,VECTOR) \ -template void CUDANode::parallel_for >(int , MatvecOp< MATRIX, VECTOR >); - -typedef miniFE::SparseMatrix Matrix_type; -typedef miniFE::Vector Vector_type; - -EXPLICIT_MATRIX_SUPPORT(Matrix_type,Vector_type) - diff --git a/kokkos/basic/optional/cuda/Vector.cu b/kokkos/basic/optional/cuda/Vector.cu deleted file mode 100644 index 9a79955..0000000 --- a/kokkos/basic/optional/cuda/Vector.cu +++ /dev/null @@ -1,19 +0,0 @@ -#define CUDANODE_INCLUDE_PARALLEL_REDUCE -#define CUDANODE_INCLUDE_PARALLEL_FOR - -// include for CudaNode method implementations -#include - -// includes for all operators for which Vector needs support -#include -#include -#include -#include - -// explicit instantiations for Vectors -#define EXPLICIT_VECTOR_SUPPORT(GLOBALORDINAL, SCALAR) \ -template void CUDANode::parallel_for >(int , WaxpbyOp< SCALAR >); \ -template void CUDANode::parallel_reduce< DotOp< SCALAR > >(int , DotOp< SCALAR >& ); \ -template void CUDANode::parallel_for >(int , FEComputeElem< GLOBALORDINAL, SCALAR > ); - -EXPLICIT_VECTOR_SUPPORT(MINIFE_GLOBAL_ORDINAL, MINIFE_SCALAR) diff --git a/kokkos/basic/optional/cuda/cutil_inline_runtime.h b/kokkos/basic/optional/cuda/cutil_inline_runtime.h deleted file mode 100644 index 1f49afb..0000000 --- a/kokkos/basic/optional/cuda/cutil_inline_runtime.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_ -#define _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_ - -#include -#include -#include - -#include - -// We define these calls here, so the user doesn't need to include __FILE__ and __LINE__ -// The advantage is the developers gets to use the inline function so they can debug -#define cutilSafeCallNoSync(err) __cudaSafeCallNoSync(err, __FILE__, __LINE__) -#define cutilSafeCall(err) __cudaSafeCall (err, __FILE__, __LINE__) -#define cutilSafeThreadSync() __cudaSafeThreadSync(__FILE__, __LINE__) -#define cutilCheckMsg(msg) __cutilCheckMsg (msg, __FILE__, __LINE__) - -inline void __cudaSafeCallNoSync( cudaError err, const char *file, const int line ) -{ - if( cudaSuccess != err) { - fprintf(stderr, "cudaSafeCallNoSync() Runtime API error in file <%s>, line %i : %s.\n", - file, line, cudaGetErrorString( err) ); - exit(-1); - } -} - -inline void __cudaSafeCall( cudaError err, const char *file, const int line ) -{ - if( cudaSuccess != err) { - fprintf(stderr, "cudaSafeCall() Runtime API error in file <%s>, line %i : %s.\n", - file, line, cudaGetErrorString( err) ); - exit(-1); - } -} - -inline void __cudaSafeThreadSync( const char *file, const int line ) -{ - cudaError err = cudaThreadSynchronize(); - if ( cudaSuccess != err) { - fprintf(stderr, "cudaThreadSynchronize() Driver API error in file '%s' in line %i : %s.\n", - file, line, cudaGetErrorString( err) ); - exit(-1); - } -} - -inline void __cutilCheckMsg( const char *errorMessage, const char *file, const int line ) -{ - cudaError_t err = cudaGetLastError(); - if( cudaSuccess != err) { - fprintf(stderr, "cutilCheckMsg() CUTIL CUDA error: %s in file <%s>, line %i : %s.\n", - errorMessage, file, line, cudaGetErrorString( err) ); - exit(-1); - } -#ifdef _DEBUG - err = cudaThreadSynchronize(); - if( cudaSuccess != err) { - fprintf(stderr, "cutilCheckMsg cudaThreadSynchronize error: %s in file <%s>, line %i : %s.\n", - errorMessage, file, line, cudaGetErrorString( err) ); - exit(-1); - } -#endif -} - -#endif // _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_ diff --git a/kokkos/basic/optional/make_targets b/kokkos/basic/optional/make_targets deleted file mode 100644 index 01ed2c8..0000000 --- a/kokkos/basic/optional/make_targets +++ /dev/null @@ -1,54 +0,0 @@ -#----------------------------------------------------------------------- - -TPI.o : ./optional/ThreadPool/src/TPI.c - $(CC) $(CFLAGS) $(CPPFLAGS) -c $< - -#----------------------------------------------------------------------- - -CudaNode.o : ./optional/cuda/CudaNode.cpp ./optional/cuda/*.hpp ./optional/cuda/*.h - $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $< - -CudaVector.o : ./optional/cuda/Vector.cu ./optional/cuda/*.cuh - nvcc $(CUDAFLAGS) $(CPPFLAGS) -c -o $@ $< - -CudaMatrix.o : ./optional/cuda/Matrix.cu ./optional/cuda/*.cuh - nvcc $(CUDAFLAGS) $(CPPFLAGS) -c -o $@ $< - -#----------------------------------------------------------------------- -# Recursive make to create the object files in this directory, -# generate the archive, and then remove the object files. - -libstk.a : - cd ./optional ; \ - $(MAKE) "CC=$(CC)" "CXX=$(CXX)" "CPPFLAGS=$(CPPFLAGS)" "CFLAGS=$(CFLAGS)" "CXXFLAGS=$(CXXFLAGS)" -f make_targets stk_library - -STK_SOURCE = \ - ./shards/src/*.cpp \ - ./stk_util/util/*.cpp \ - ./stk_util/environment/*.cpp \ - ./stk_util/parallel/*.cpp \ - ./stk_mesh/base/*.cpp \ - ./stk_mesh/baseImpl/*.cpp \ - ./stk_mesh/fem/*.cpp \ - stk_helpers.cpp - -STK_INCLUDES = \ - ./shards/src/*.hpp \ - ./shards/src/*.h \ - ./stk_util/util/*.hpp \ - ./stk_util/environment/*.hpp \ - ./stk_util/parallel/*.hpp \ - ./stk_mesh/base/*.hpp \ - ./stk_mesh/fem/*.hpp - -STK_INC = -I${PWD}/ThreadPool -I${PWD}/shards - -stk_library : $(STK_SOURCE) $(STK_INCLUDES) - $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(STK_INC) -c $(STK_SOURCE) - ar -qc ../libstk.a *.o - ranlib ../libstk.a - rm *.o - -#----------------------------------------------------------------------- - - diff --git a/kokkos/basic/perform_element_loop.hpp b/kokkos/basic/perform_element_loop.hpp deleted file mode 100644 index f65ad4f..0000000 --- a/kokkos/basic/perform_element_loop.hpp +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef _perform_element_loop_hpp_ -#define _perform_element_loop_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include -#include -#include -#include -#include - -namespace miniFE { - -template -void -perform_element_loop(const simple_mesh_description& mesh, - const Box& local_elem_box, - MatrixType& A, VectorType& b, - Parameters& /*params*/) -{ - typedef typename MatrixType::ScalarType Scalar; - - int global_elems_x = mesh.global_box[0][1]; - int global_elems_y = mesh.global_box[1][1]; - int global_elems_z = mesh.global_box[2][1]; - - //We will iterate the local-element-box (local portion of the mesh), and - //get element-IDs in preparation for later assembling the FE operators - //into the global sparse linear-system. - - GlobalOrdinal num_elems = get_num_ids(local_elem_box); - std::vector elemIDs(num_elems); - - BoxIterator iter = BoxIterator::begin(local_elem_box); - BoxIterator end = BoxIterator::end(local_elem_box); - - for(size_t i=0; iter != end; ++iter, ++i) { - elemIDs[i] = get_id(global_elems_x, global_elems_y, global_elems_z, - iter.x, iter.y, iter.z); -//#ifdef MINIFE_DEBUG -//std::cout << "elem ID " << elemIDs[i] << " ("< elem_data; - - compute_gradient_values(elem_data.grad_vals); - - timer_type t_gn = 0, t_ce = 0, t_si = 0; - timer_type t0 = 0; - for(size_t i=0; i -#include -#include -#include -#include -#include -#include - -#include - -namespace miniFE { - -//--------------------------------------------------------------------- -//This file contains three 'filter' classes, and a 'perform_element_loop' -//function that uses those filter classes to run a TBB pipeline. -// -//The filter classes are as follows: -//1. GetElemNodesCoords -// For each element in the mesh, create an elem-data object with coords -// and node-ids. -//2. Compute_FE_Operators -// Given an elem-data object (with coords and node-ids), compute the -// diffusion-matrix and source-vector. -//3. LockingSumIntoLinearSystem -// Given an elem-data object (with diffusion-matrix and source-vector), -// assemble into global-sparse linear-system. Uses a lock on each -// matrix row to ensure that multiple threads don't update the same row -// at the same time. -//... or: -//3. SumIntoLinearSystem -// Given an elem-data object (with diffusion-matrix and source-vector), -// assemble into global-sparse linear-system. -// There are several of these filters, usually 1 per thread, and each -// will be responsible for a certain slice of equations. It will check -// the elem-data for equations that are in its slice, assemble those, and -// pass the elem-data on so that the next SumIntoLinearSystem filter can -// deal with equations in a different 'slice'. -// -//--------------------------------------------------------------------- - -//--------------------------------------------------------------------- - -/** Filter 1.: GetElemNodesCoords - */ -template -class GetElemNodesCoords : public tbb::filter { -public: - GetElemNodesCoords(const std::vector& elemIDs, - const simple_mesh_description& mesh, - size_t num_elems_at_a_time) - : tbb::filter(/*is_serial=*/true), - elemIDs_(elemIDs), - i_(0), - mesh_(mesh), - num_elems_(num_elems_at_a_time) - { - if (num_elems_ < 1) num_elems_ = 1; - } - - ~GetElemNodesCoords(){} - -private: - /** This operator launches an elem-data object for a 'group' (size num_elems_) - * of elements. When all elements have been launched, return NULL to signal - * that we're done issuing data. - */ - void* operator()(void* item) { - if (i_ >= elemIDs_.size()) return NULL; - - size_t num = num_elems_; - if (i_+num > elemIDs_.size()) num = elemIDs_.size() - i_; - - std::vector >* elemdata_vec = new std::vector >(num); - - size_t i=0; - while (i_ < elemIDs_.size() && i < num) { - get_elem_nodes_and_coords(mesh_, elemIDs_[i_], (*elemdata_vec)[i]); - ++i_; - ++i; - } - - return elemdata_vec; - } - - const std::vector& elemIDs_; - size_t i_; - const simple_mesh_description& mesh_; - size_t num_elems_; -}; - -//--------------------------------------------------------------------- - -/** Filter 2.: Compute_FE_Operators - */ -template -class Compute_FE_Operators : public tbb::filter { -public: - Compute_FE_Operators() : tbb::filter(/*is_serial=*/false) {} - ~Compute_FE_Operators() {} - -private: - /** This operator takes a vector of elem-data objects which are assumed - * to have nodal-coordinates already populated, and computes the - * element-diffusion-matrix and element-source-vector for each. - */ - void* operator()(void* item) { - if (item == NULL) return NULL; - std::vector >* elemdata = static_cast >*>(item); - - for(size_t i=0; isize(); ++i) { - compute_element_matrix_and_vector((*elemdata)[i]); - } - return elemdata; - } -}; - -//--------------------------------------------------------------------- - -/** Filter 3.: SumIntoLinearSystem - */ -template -class SumIntoLinearSystem : public tbb::filter { - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - typedef typename MatrixType::ScalarType Scalar; - -public: - SumIntoLinearSystem(GlobalOrdinal myFirstRow, - GlobalOrdinal myLastRow, - MatrixType& mat, VectorType& vec) - : tbb::filter(/*is_serial=*/true), - A_(mat), b_(vec), - myFirstRow_(myFirstRow), - myLastRow_(myLastRow) - { - } - - ~SumIntoLinearSystem() {} - -private: - /** This operator takes a vector of elem-data objects which have an - * element-diffusion-matrix and source-vector, looks through it for - * any rows in this filter's slice of the global matrix, assembles - * those rows into the linear-system, then passes the elem-data object - * on for use by the next assembly filter. - * If this assembly filter is responsible for the last slice of the - * row-space, then this is the last filter and so we delete the - * elem-data object. - */ - void* operator()(void* item) { - if (item == NULL) return NULL; - std::vector >* elemdata_vec = static_cast >*>(item); - - for(size_t e=0; esize(); ++e) { - ElemData& elemdata = (*elemdata_vec)[e]; - size_t nnodes = elemdata.nodes_per_elem; - for(size_t i=0; i myLastRow_) continue; - - sum_into_row(row, nnodes, elemdata.elem_node_ids, - &(elemdata.elem_diffusion_matrix[i*nnodes]), A_); - sum_into_vector(1, &row, &(elemdata.elem_source_vector[i]), b_); - } - } - - if (myLastRow_ >= A_.rows.size()) { - delete elemdata_vec; - return NULL; - } - - return elemdata_vec; - } - - MatrixType& A_; - VectorType& b_; - GlobalOrdinal myFirstRow_; - GlobalOrdinal myLastRow_; -}; - -//--------------------------------------------------------------------- - -static tbb::atomic matrix_suminto; - -/** Filter 3.: SumIntoLinearSystem with locking - */ -template -class LockingSumIntoLinearSystem : public tbb::filter { - typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; - typedef typename MatrixType::ScalarType Scalar; - -public: - LockingSumIntoLinearSystem(MatrixType& mat, VectorType& vec) - : tbb::filter(/*is_serial=*/false), - A_(mat), b_(vec) - { - } - - ~LockingSumIntoLinearSystem() {} - -private: - /** This operator takes a vector of elem-data objects which have an - * element-diffusion-matrix and source-vector, and assembles into - * the linear-system, using locking to make sure no other - * thread is assembling the same global row at the same time. - */ - void* operator()(void* item) { - if (item == NULL) return NULL; - std::vector >* elemdata_vec = static_cast >*>(item); - - for(size_t e=0; esize(); ++e) { - ElemData& elemdata = (*elemdata_vec)[e]; - size_t nnodes = elemdata.nodes_per_elem; - size_t offset = 0; - for(size_t i=0; i A_; - LockingVector b_; -}; - -//--------------------------------------------------------------------- - -template -void -perform_element_loop(const simple_mesh_description& mesh, - const Box& local_elem_box, - MatrixType& A, VectorType& b, - Parameters& params) -{ - typedef typename MatrixType::ScalarType Scalar; - - if (A.rows.size() == 0) return; - - int num_threads = params.numthreads; - - //We will iterate the local-element-box (local portion of the mesh), and - //assemble the FE operators into the global sparse linear-system. - - tbb::pipeline pipe; - - int global_elems_x = mesh.global_box[0][1]; - int global_elems_y = mesh.global_box[1][1]; - int global_elems_z = mesh.global_box[2][1]; - - GlobalOrdinal num_elems = get_num_ids(local_elem_box); - std::vector elemIDs(num_elems); - - BoxIterator iter = BoxIterator::begin(local_elem_box); - BoxIterator end = BoxIterator::end(local_elem_box); - - for(size_t i=0; iter != end; ++iter, ++i) { - elemIDs[i] = get_id(global_elems_x, global_elems_y, global_elems_z, - iter.x, iter.y, -iter.z); - } - - //Create the first stage of the pipeline, the filter that will - //launch elem-data from the mesh, through the pipeline. - GetElemNodesCoords get_nodes_coords(elemIDs, mesh, params.elem_group_size); - - //Create the second stage of the pipeline, the parallel filter that will - //compute element-matrices and element-vectors. - Compute_FE_Operators fe_ops; - - //Add the filters to the pipeline: - pipe.add_filter(get_nodes_coords); - pipe.add_filter(fe_ops); - - LockingSumIntoLinearSystem* sum_into_linsys = NULL; - size_t num_assembly_filters = 0; - std::vector*> linsys; - - bool use_locking = params.use_locking==1; - if (use_locking) { - sum_into_linsys = new LockingSumIntoLinearSystem(A, b); - pipe.add_filter(*sum_into_linsys); - } - else { - //If not using locking, create several assembly filters, each of which - //will be responsible for assembling rows into a certain slice of the - //global matrix. - - num_assembly_filters = num_threads/3; - if (num_assembly_filters == 0) num_assembly_filters = 1; - num_assembly_filters = 2; - - size_t num_rows = A.rows.size(); - size_t rows_per_thread = num_rows/num_assembly_filters; - if (num_rows % num_assembly_filters > 0) ++rows_per_thread; - size_t first_row = A.rows[0]; - for(int i=0; i * sum_into = new SumIntoLinearSystem(first_row, last_row, A, b); - linsys.push_back(sum_into); - pipe.add_filter(*sum_into); - - first_row += rows_per_thread; - } - } - - //Running the pipeline carries out the element-loop and assembly. - pipe.run(num_threads); - - pipe.clear(); - - if (use_locking) { - std::cout << "\n{number of matrix conflicts: " << miniFE_num_matrix_conflicts << "}"< -#include -#include -#include -#include -#include -#include -#include - -namespace miniFE { - -//--------------------------------------------------------------------- - -template -struct FEAssembleSumInto { - const simple_mesh_description* mesh; - GlobalOrdinal* elemIDs; - LockingMatrix* A; - LockingVector* b; - -inline void operator()(int i) -{ - ElemData elem_data; - GlobalOrdinal elemID = elemIDs[i]; - get_elem_nodes_and_coords(*mesh, elemID, elem_data.elem_node_ids, - elem_data.elem_node_coords); - compute_element_matrix_and_vector(elem_data); - sum_into_global_linear_system(elem_data, *A, *b); -} -}; - -template -void -perform_element_loop(const simple_mesh_description& mesh, - const Box& local_elem_box, - MatrixType& A, VectorType& b, - Parameters& params) -{ - typedef typename MatrixType::ScalarType Scalar; - - if (A.rows.size() == 0) return; - - int num_threads = params.numthreads; - - timer_type t0 = mytimer(); - - //We will iterate the local-element-box (local portion of the mesh), and - //assemble the FE operators into the global sparse linear-system. - - int global_elems_x = mesh.global_box[0][1]; - int global_elems_y = mesh.global_box[1][1]; - int global_elems_z = mesh.global_box[2][1]; - - GlobalOrdinal num_elems = get_num_ids(local_elem_box); - std::vector elemIDs(num_elems); - - BoxIterator iter = BoxIterator::begin(local_elem_box); - BoxIterator end = BoxIterator::end(local_elem_box); - - for(size_t i=0; iter != end; ++iter, ++i) { - elemIDs[i] = get_id(global_elems_x, global_elems_y, global_elems_z, - iter.x, iter.y, iter.z); - } - - LockingMatrix lockingA(A); - LockingVector lockingb(b); - - FEAssembleSumInto fe_op; - fe_op.mesh = &mesh; - fe_op.elemIDs = &elemIDs[0]; - fe_op.A = &lockingA; - fe_op.b = &lockingb; - - typedef typename VectorType::ComputeNodeType ComputeNodeType; - - ComputeNodeType& compute_node = b.compute_node; - - compute_node.parallel_for(elemIDs.size(), fe_op); - - std::cout << "\n{number of matrix conflicts: " << miniFE_num_matrix_conflicts << "}"< -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace miniFE { - -//--------------------------------------------------------------------- - -template -void -perform_element_loop(const simple_mesh_description& mesh, - const Box& local_elem_box, - MatrixType& A, VectorType& b, - Parameters& params) -{ - typedef typename MatrixType::ScalarType Scalar; - - if (A.rows.size() == 0) return; - - int num_threads = params.numthreads; - - timer_type t0 = mytimer(); - - //We will iterate the local-element-box (local portion of the mesh), and - //assemble the FE operators into the global sparse linear-system. - - int global_elems_x = mesh.global_box[0][1]; - int global_elems_y = mesh.global_box[1][1]; - int global_elems_z = mesh.global_box[2][1]; - - GlobalOrdinal num_elems = get_num_ids(local_elem_box); - std::vector elemIDs(num_elems); - - BoxIterator iter = BoxIterator::begin(local_elem_box); - BoxIterator end = BoxIterator::end(local_elem_box); - - for(size_t i=0; iter != end; ++iter, ++i) { - elemIDs[i] = get_id(global_elems_x, global_elems_y, global_elems_z, - iter.x, iter.y, iter.z); - } - - std::vector node_ordinals(num_elems*Hex8::numNodesPerElem); - std::vector node_coords(num_elems*Hex8::numNodesPerElem*Hex8::spatialDim); - std::vector elem_matrices(num_elems*Hex8::numNodesPerElem*Hex8::numNodesPerElem); - std::vector elem_vectors(num_elems*Hex8::numNodesPerElem); - - LockingMatrix lockingA(A); - LockingVector lockingb(b); - - GetNodesCoords get_nodes_coords; - get_nodes_coords.elemIDs = &elemIDs[0]; - get_nodes_coords.mesh = &mesh; - get_nodes_coords.node_ordinals = &node_ordinals[0]; - get_nodes_coords.elem_node_coords = &node_coords[0]; - - typedef typename VectorType::ComputeNodeType ComputeNodeType; - - ComputeNodeType& compute_node = b.compute_node; - - compute_node.parallel_for(elemIDs.size(), get_nodes_coords); - - timer_type t_gn = mytimer() - t0; - t0 = mytimer(); - -#ifdef MINIFE_HAVE_CUDA - CUDANode& elem_compute_node = CUDANode::singleton(); -#else - ComputeNodeType& elem_compute_node = compute_node; -#endif - timer_type t_ccn = mytimer() - t0; - t0 = mytimer(); - - Scalar* d_node_coords = elem_compute_node.get_buffer(&node_coords[0], node_coords.size()); - Scalar* d_elem_matrices = elem_compute_node.get_buffer(&elem_matrices[0], elem_matrices.size()); - Scalar* d_elem_vectors = elem_compute_node.get_buffer(&elem_vectors[0], elem_vectors.size()); - - elem_compute_node.copy_to_buffer(&node_coords[0], node_coords.size(), d_node_coords); - - FEComputeElem fe_compute_elem; - fe_compute_elem.elem_node_coords = &d_node_coords[0]; - fe_compute_elem.elem_diffusion_matrix = &d_elem_matrices[0]; - fe_compute_elem.elem_source_vector = &d_elem_vectors[0]; - - elem_compute_node.parallel_for(elemIDs.size(), fe_compute_elem); - - elem_compute_node.copy_from_buffer(&elem_matrices[0], elem_matrices.size(), d_elem_matrices); - elem_compute_node.copy_from_buffer(&elem_vectors[0], elem_vectors.size(), d_elem_vectors); - - timer_type t_ce = mytimer() - t0; - - t0 = mytimer(); - SumInLinSys sum_in; - sum_in.node_ordinals = &node_ordinals[0]; - sum_in.elem_diffusion_matrix = &elem_matrices[0]; - sum_in.elem_source_vector = &elem_vectors[0]; - sum_in.A = &lockingA; - sum_in.b = &lockingb; - - compute_node.parallel_for(elemIDs.size(), sum_in); - - timer_type t_si = mytimer() - t0; - std::cout << "time to get nodes/coords: " << t_gn << std::endl; - std::cout << "time to create compute-node: " << t_ccn << ", time to compute elements: " << t_ce << std::endl; - std::cout << "time to sum into linsys: " << t_si << std::endl; - std::cout << "\n{number of matrix conflicts: " << miniFE_num_matrix_conflicts << "}"< " -exit 1 -fi - -np=$1 -nx=$2 -ny=$3 -nz=$4 - -echo " " -echo "running miniFE test for np=${np}, nx=${nx} ny=${ny} nz=${nz}..." - -if [ ! -x miniFE.x ]; then -echo "miniFE.x doesn't exist or isn't executable. Aborting." -exit -1 -fi - -mpirun -np ${np} miniFE.x nx=${nx} ny=${ny} nz=${nz} >& miniFE_run.out -rm miniFE_run.out - -if [ ! -f A.mtx.${np}.0 ]; then -echo "matrix file A.mtx.${np}.0 doesn't exist. build miniFE with -DMINIFE_DEBUG." -fi - -p=0 -while [ $p -lt ${np} ]; do -diff A.mtx.${np}.$p gold_files/1x1x2_A.mtx.${np}.$p >& diff.A.$p.txt -diff b.vec.${np}.$p gold_files/1x1x2_b.vec.${np}.$p >& diff.b.$p.txt -diff x.vec.${np}.$p gold_files/1x1x2_x.vec.${np}.$p >& diff.x.$p.txt - -test_result="passed" -if [ -s diff.A.$p.txt ]; then -echo "TEST FAILED: see diff.A.${p}.txt" -test_result="failed" -fi - -if [ -s diff.b.$p.txt ]; then -echo "TEST FAILED: see diff.b.${p}.txt" -test_result="failed" -fi - -if [ -s diff.x.$p.txt ]; then -echo "TEST FAILED: see diff.x.${p}.txt" -test_result="failed" -fi - -if [ $test_result != "passed" ]; then -echo "test failed" -exit 1 -fi - -let p=p+1 -rm diff.*.txt -done - -echo "tests passed" - diff --git a/kokkos/basic/run_tests b/kokkos/basic/run_tests deleted file mode 100755 index 5e03399..0000000 --- a/kokkos/basic/run_tests +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -echo " " -echo "running miniFE tests..." - -if [ ! -x miniFE.x ]; then -echo "miniFE.x doesn't exist or isn't executable. Aborting." -exit -1 -fi - -./run_one_test 1 1 1 2 -if [ $? != 0 ]; then -echo "test failed" -exit $? -fi - -./run_one_test 2 1 1 2 -if [ $? != 0 ]; then -echo "test failed" -exit $? -fi - diff --git a/kokkos/basic/sharedmem.cuh b/kokkos/basic/sharedmem.cuh deleted file mode 100644 index b13c4f2..0000000 --- a/kokkos/basic/sharedmem.cuh +++ /dev/null @@ -1,153 +0,0 @@ -/* -* Copyright 1993-2006 NVIDIA Corporation. All rights reserved. -* -* NOTICE TO USER: -* -* This source code is subject to NVIDIA ownership rights under U.S. and -* international Copyright laws. -* -* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE -* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR -* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH -* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF -* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. -* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, -* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -* OR PERFORMANCE OF THIS SOURCE CODE. -* -* U.S. Government End Users. This source code is a "commercial item" as -* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of -* "commercial computer software" and "commercial computer software -* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) -* and is provided to the U.S. Government only as a commercial end item. -* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through -* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the -* source code with only those rights set forth herein. -*/ - -#ifndef _SHAREDMEM_H_ -#define _SHAREDMEM_H_ - -//**************************************************************************** -// Because dynamically sized shared memory arrays are declared "extern", -// we can't templatize them directly. To get around this, we declare a -// simple wrapper struct that will declare the extern array with a different -// name depending on the type. This avoids compiler errors about duplicate -// definitions. -// -// To use dynamically allocated shared memory in a templatized __global__ or -// __device__ function, just replace code like this: -// -// -// template -// __global__ void -// foo( T* g_idata, T* g_odata) -// { -// // Shared mem size is determined by the host app at run time -// extern __shared__ T sdata[]; -// ... -// doStuff(sdata); -// ... -// } -// -// With this -// template -// __global__ void -// foo( T* g_idata, T* g_odata) -// { -// // Shared mem size is determined by the host app at run time -// SharedMemory smem; -// T* sdata = smem.getPointer(); -// ... -// doStuff(sdata); -// ... -// } -//**************************************************************************** - -// This is the un-specialized struct. Note that we prevent instantiation of this -// struct by putting an undefined symbol in the function body so it won't compile. -template -struct SharedMemory -{ - // Ensure that we won't compile any un-specialized types - __device__ T* getPointer() { - extern __device__ void error(void); - error(); - return NULL; - } -}; - -// Following are the specializations for the following types. -// int, uint, char, uchar, short, ushort, long, ulong, bool, float, and double -// One could also specialize it for user-defined types. - -template <> -struct SharedMemory -{ - __device__ int* getPointer() { extern __shared__ int s_int[]; return s_int; } -}; - -template <> -struct SharedMemory -{ - __device__ unsigned int* getPointer() { extern __shared__ unsigned int s_uint[]; return s_uint; } -}; - -template <> -struct SharedMemory -{ - __device__ char* getPointer() { extern __shared__ char s_char[]; return s_char; } -}; - -template <> -struct SharedMemory -{ - __device__ unsigned char* getPointer() { extern __shared__ unsigned char s_uchar[]; return s_uchar; } -}; - -template <> -struct SharedMemory -{ - __device__ short* getPointer() { extern __shared__ short s_short[]; return s_short; } -}; - -template <> -struct SharedMemory -{ - __device__ unsigned short* getPointer() { extern __shared__ unsigned short s_ushort[]; return s_ushort; } -}; - -template <> -struct SharedMemory -{ - __device__ long* getPointer() { extern __shared__ long s_long[]; return s_long; } -}; - -template <> -struct SharedMemory -{ - __device__ unsigned long* getPointer() { extern __shared__ unsigned long s_ulong[]; return s_ulong; } -}; - -template <> -struct SharedMemory -{ - __device__ bool* getPointer() { extern __shared__ bool s_bool[]; return s_bool; } -}; - -template <> -struct SharedMemory -{ - __device__ float* getPointer() { extern __shared__ float s_float[]; return s_float; } -}; - -template <> -struct SharedMemory -{ - __device__ double* getPointer() { extern __shared__ double s_double[]; return s_double; } -}; - - -#endif //_SHAREDMEM_H_ diff --git a/kokkos/basic/simple_mesh_description.hpp b/kokkos/basic/simple_mesh_description.hpp deleted file mode 100644 index 717dc6c..0000000 --- a/kokkos/basic/simple_mesh_description.hpp +++ /dev/null @@ -1,239 +0,0 @@ - -#ifndef _simple_mesh_description_hpp_ -#define _simple_mesh_description_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include -#include - -namespace miniFE { - -template -class simple_mesh_description { -public: - simple_mesh_description(const Box& global_box_in, const Box& local_box_in) - { - Box local_node_box; - for(int i=0; i<3; ++i) { - global_box[i][0] = global_box_in[i][0]; - global_box[i][1] = global_box_in[i][1]; - local_box[i][0] = local_box_in[i][0]; - local_box[i][1] = local_box_in[i][1]; - local_node_box[i][0] = local_box_in[i][0]; - local_node_box[i][1] = local_box_in[i][1]; - //num-owned-nodes == num-elems+1 in this dimension if the elem box is not empty - //and we are at the high end of the global range in that dimension: - if (local_box_in[i][1] > local_box_in[i][0] && local_box_in[i][1] == global_box[i][1]) local_node_box[i][1] += 1; - } - - int max_node_x = global_box[0][1]+1; - int max_node_y = global_box[1][1]+1; - int max_node_z = global_box[2][1]+1; - create_map_id_to_row(max_node_x, max_node_y, max_node_z, local_node_box, - map_ids_to_rows); - - //As described in analytic_soln.hpp, - //we will impose a 0 boundary-condition on faces x=0, y=0, z=0, y=1, z=1 - //we will impose a 1 boundary-condition on face x=1 - -#ifdef MINIFE_DEBUG -std::cout< 0) --miny; - if (local_node_box[Z][0] > 0) --minz; - if (local_node_box[Y][1] < max_node_y) ++maxy; - if (local_node_box[Z][1] < max_node_z) ++maxz; - - for(int iz=minz; iz(max_node_x, max_node_y, max_node_z, - 0, iy, iz); -#ifdef MINIFE_DEBUG -std::cout<<"x=0 BC, node "< 0) --minx; - if (local_node_box[Z][0] > 0) --minz; - if (local_node_box[X][1] < max_node_x) ++maxx; - if (local_node_box[Z][1] < max_node_z) ++maxz; - - for(int iz=minz; iz(max_node_x, max_node_y, max_node_z, - ix, 0, iz); -#ifdef MINIFE_DEBUG -std::cout<<"y=0 BC, node "< 0) --minx; - if (local_node_box[Y][0] > 0) --miny; - if (local_node_box[X][1] < max_node_x) ++maxx; - if (local_node_box[Y][1] < max_node_y) ++maxy; - - for(int iy=miny; iy(max_node_x, max_node_y, max_node_z, - ix, iy, 0); -#ifdef MINIFE_DEBUG -std::cout<<"z=0 BC, node "< 0) --minz; - if (local_node_box[Y][0] > 0) --miny; - if (local_node_box[Z][1] < max_node_z) ++maxz; - if (local_node_box[Y][1] < max_node_y) ++maxy; - - for(int iy=miny; iy(max_node_x, max_node_y, max_node_z, - x1, iy, iz); - int row = map_id_to_row(nodeID); -#ifdef MINIFE_DEBUG -std::cout<<"x=1 BC, node "< 0) --minz; - if (local_node_box[X][0] > 0) --minx; - if (local_node_box[Z][1] < max_node_z) ++maxz; - if (local_node_box[X][1] < max_node_x) ++maxx; - - for(int ix=minx; ix(max_node_x, max_node_y, max_node_z, - ix, y1, iz); -#ifdef MINIFE_DEBUG -std::cout<<"y=1 BC, node "< 0) --miny; - if (local_node_box[X][0] > 0) --minx; - if (local_node_box[Y][1] < max_node_y) ++maxy; - if (local_node_box[X][1] < max_node_x) ++maxx; - - for(int ix=minx; ix(max_node_x, max_node_y, max_node_z, - ix, iy, z1); -#ifdef MINIFE_DEBUG -std::cout<<"z=1 BC, node "< bc_rows_0; - std::set bc_rows_1; - std::map map_ids_to_rows; - Box global_box; - Box local_box; -};//class simple_mesh_description - -}//namespace miniFE - -#endif diff --git a/kokkos/basic/time_kernels.hpp b/kokkos/basic/time_kernels.hpp deleted file mode 100644 index b14f743..0000000 --- a/kokkos/basic/time_kernels.hpp +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef _time_kernels_hpp_ -#define _time_kernels_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include - -#include -#include - -#ifdef MINIFE_HAVE_CUDA -#include -#endif - -namespace miniFE { - -template -void -time_kernels(OperatorType& A, - const VectorType& b, - VectorType& x, - Matvec matvec, - typename OperatorType::LocalOrdinalType max_iter, - typename OperatorType::ScalarType& xdotp, - timer_type* my_kern_times) -{ - typedef typename OperatorType::ScalarType ScalarType; - typedef typename OperatorType::LocalOrdinalType OrdinalType; - typedef typename TypeTraits::magnitude_type magnitude_type; - - timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0; - - int myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - if (!A.has_local_indices) { - std::cerr << "miniFE::time_kernels ERROR, A.has_local_indices is false, needs to be true. This probably means " - << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::time_kernels." - << std::endl; - return; - } - - OrdinalType nrows = A.rows.size(); - OrdinalType ncols = A.num_cols; - - VectorType p(0, ncols, b.compute_node); - - ScalarType one = 1.0; - ScalarType zero = 0.0; - - typedef typename VectorType::ComputeNodeType ComputeNodeType; - ComputeNodeType& compute_node = x.compute_node; - - //The following lines that create and initialize buffers are no-ops in many - //cases, but perform actual allocations and copies if a off-cpu device such as - //a GPU is being used by compute_node. - - //Do any required allocations for buffers that will be needed during CG: - ScalarType* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); - ScalarType* d_p = compute_node.get_buffer(&p.coefs[0], p.coefs.size()); - ScalarType* d_b = compute_node.get_buffer(&b.coefs[0], b.coefs.size()); - OrdinalType* d_Arowoff = compute_node.get_buffer(&A.row_offsets[0], A.row_offsets.size()); - OrdinalType* d_Acols = compute_node.get_buffer(&A.packed_cols[0], A.packed_cols.size()); - ScalarType* d_Acoefs = compute_node.get_buffer(&A.packed_coefs[0], A.packed_coefs.size()); - - //Copy data to buffers that need to be initialized from input data: - compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); - compute_node.copy_to_buffer(&b.coefs[0], b.coefs.size(), d_b); - compute_node.copy_to_buffer(&A.row_offsets[0], A.row_offsets.size(), d_Arowoff); - compute_node.copy_to_buffer(&A.packed_cols[0], A.packed_cols.size(), d_Acols); - compute_node.copy_to_buffer(&A.packed_coefs[0], A.packed_coefs.size(), d_Acoefs); - - TICK(); - for(OrdinalType i=0; i - -#ifdef HAVE_MPI -#include -#endif - -#include -#include - -int main(int argc, char** argv) { - -#ifdef HAVE_MPI - MPI_Init(&argc, &argv); -#endif - - //utest_case.hpp declares the 'get_utest_cases' function. - - std::vector& utest_cases = get_utest_cases(); - bool tests_passed = true; - - for(size_t i=0; irun(); - if (passed) std::cout << " pass: " << utest_cases[i]->name() << std::endl; - else { - std::cout << "!!!FAIL: " << utest_cases[i]->name() << std::endl; - tests_passed = false; - } - } - - if (!tests_passed) { - std::cout << "at least 1 test failed."< - -class utest_case; - -std::vector& get_utest_cases() -{ - static std::vector utest_cases; - return utest_cases; -} - -//When a class that inherits the utest_case class is constructed, -//it gets added to the vector of utest_cases returned by -//the above 'get_utest_cases' function. -class utest_case { -public: - utest_case(){ get_utest_cases().push_back(this); } - ~utest_case(){} - virtual const char* name() = 0; - virtual bool run() = 0; -}; - -//The following macro declares and instantiates a class that -//inherits the above utest_case interfaces. -// -//use the macro like this: -// UTEST_CASE(mytest) -// { -// ... test code here ... -// } -// -//See example usages in utest_cases.hpp -// -#define UTEST_CASE(TESTNAME) \ - class TESTNAME##_utest : public utest_case { \ - public: \ - TESTNAME##_utest(){} \ - const char* name() {return #TESTNAME;} \ - bool run(); \ - }; \ - \ - TESTNAME##_utest instance_##TESTNAME##_utest; \ - \ - bool TESTNAME##_utest::run() - -#define TEST_EQUAL(A,B) \ - if ((A) != (B)) return false; - -#define TEST_EQUAL_TOL(A,B,tol) \ - if (std::abs((A) - (B)) > tol) return false; - -#endif - diff --git a/kokkos/basic/utest_cases.hpp b/kokkos/basic/utest_cases.hpp deleted file mode 100644 index d15ef9d..0000000 --- a/kokkos/basic/utest_cases.hpp +++ /dev/null @@ -1,1232 +0,0 @@ -#ifndef _utest_cases_hpp_ -#define _utest_cases_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef MINIFE_HAVE_TPI -#include -#include -#endif - -#ifdef MINIFE_HAVE_TBB -#include -#include -#endif - -#ifdef MINIFE_HAVE_CUDA -#include -#endif - -#include - -typedef MINIFE_SCALAR Scalar; -typedef MINIFE_LOCAL_ORDINAL LocalOrdinal; -typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinal; - -template -inline -int check_get_id(int nx, int ny, int nz, int x, int y, int z, T expected, const char* testname) -{ - T val = miniFE::get_id(nx,ny,nz,x,y,z); - if (val != expected) { - std::cout << testname << " failed. val=" << val<<", expected " << expected << std::endl; - return -1; - } - return 0; -} - -UTEST_CASE(box_partition) -{ - int global_box[3][2] = { { 0, 2000 }, { 0, 2000}, { 0, 2000} }; - int numprocs = 4, myproc = 0; - - int (*local_boxes0)[3][2] = (int(*)[3][2])std::malloc(sizeof(int)*numprocs*3*2); - int (*local_boxes1)[3][2] = (int(*)[3][2])std::malloc(sizeof(int)*numprocs*3*2); - int (*local_boxes2)[3][2] = (int(*)[3][2])std::malloc(sizeof(int)*numprocs*3*2); - int (*local_boxes3)[3][2] = (int(*)[3][2])std::malloc(sizeof(int)*numprocs*3*2); - - box_partition(0, numprocs, 2, global_box, local_boxes0); - box_partition(0, numprocs, 2, global_box, local_boxes1); - box_partition(0, numprocs, 2, global_box, local_boxes2); - box_partition(0, numprocs, 2, global_box, local_boxes3); - - for(int i=1; i(local_boxes0[i]) != - miniFE::get_num_ids(local_boxes0[0])) { - return false; - } - if (miniFE::get_num_ids(local_boxes1[i]) != - miniFE::get_num_ids(local_boxes1[0])) { - return false; - } - if (miniFE::get_num_ids(local_boxes2[i]) != - miniFE::get_num_ids(local_boxes2[0])) { - return false; - } - if (miniFE::get_num_ids(local_boxes3[i]) != - miniFE::get_num_ids(local_boxes3[0])) { - return false; - } - - if (miniFE::get_num_ids(local_boxes0[i]) < 0 || - miniFE::get_num_ids(local_boxes0[i]) > 2000000000) { - return false; - } - } - - std::free(local_boxes0); - std::free(local_boxes1); - std::free(local_boxes2); - std::free(local_boxes3); - - return true; -} - -UTEST_CASE(generate_matrix_structure1) -{ - int global_box[3][2] = {{ 0, 1 }, { 0, 1 }, { 0, 1 } }; - int box[3][2] = {{ 0, 1 }, { 0, 1 }, { 0, 1 } }; - - miniFE::simple_mesh_description mesh(global_box, box); - - SerialComputeNode compute_node; - miniFE::CSRMatrix A(compute_node); - - miniFE::generate_matrix_structure(mesh, A); - - int nodes_x = global_box[0][1]+1; - int nodes_y = global_box[1][1]+1; - int nodes_z = global_box[2][1]+1; - int nrows = nodes_x*nodes_y*nodes_z; - - if (A.rows.size() != nrows) { - return false; - } - - if (A.row_offsets[nrows] != 64) { - return false; - } - - return true; -} - -UTEST_CASE(generate_matrix_structure2) -{ - int global_box[3][2] = {{ 0, 2 }, { 0, 2 }, { 0, 2 } }; - int box[3][2] = {{ 0, 2 }, { 0, 2 }, { 0, 2 } }; - - miniFE::simple_mesh_description mesh(global_box, box); - - SerialComputeNode compute_node; - miniFE::CSRMatrix A(compute_node); - - int nodes_x = global_box[0][1]+1; - int nodes_y = global_box[1][1]+1; - int nodes_z = global_box[2][1]+1; - int nrows = nodes_x*nodes_y*nodes_z; - - if (nrows != 27) { - return false; - } - - miniFE::generate_matrix_structure(mesh, A); - - if (A.row_offsets.size() != nrows+1) { - return false; - } - - if (A.row_offsets[nrows] != 343) { - return false; - } - - if (A.row_offsets[14]-A.row_offsets[13] != 27) { - return false; - } - - return true; -} - -UTEST_CASE(get_hex8_node_coords_3d) -{ - std::vector coords(24); - coords[0] = 0; - coords[1] = 0; - coords[2] = 0; - coords[3] = 1; - coords[4] = 0; - coords[5] = 0; - coords[6] = 1; - coords[7] = 0; - coords[8] = -1; - coords[9] = 0; - coords[10] = 0; - coords[11] = -1; - coords[12] = 0; - coords[13] = 1; - coords[14] = 0; - coords[15] = 1; - coords[16] = 1; - coords[17] = 0; - coords[18] = 1; - coords[19] = 1; - coords[20] = -1; - coords[21] = 0; - coords[22] = 1; - coords[23] = -1; - - std::vector testcoords(24); - - miniFE::get_hex8_node_coords_3d(0, 0, 0, 1.0, &testcoords[0]); - - if (coords != testcoords) { - return false; - } - - return true; -} - -inline -void get_test_elem_mat(std::vector& elem_mat) -{ -//after much careful debugging, I'm convinced that the following is a -//correct element-diffusion matrix for the element with local-node-0 at -//coordinates 0,0,0. So pasting this into a unit-test will guard against -//unintended changes as I continue working on the code for various reasons. - - elem_mat.resize(36); -elem_mat[0] = 0.6666666664477059; -elem_mat[1] = 1.094804871759614e-10; -elem_mat[2] = -0.1666666666666667; -elem_mat[3] = 1.094805019211109e-10; -elem_mat[4] = 1.094804871759614e-10; -elem_mat[5] = -0.1666666666666667; -elem_mat[6] = -0.1666666667761472; -elem_mat[7] = -0.1666666666666667; -elem_mat[8] = 0.666666666447706; -elem_mat[9] = 1.094804941148553e-10; -elem_mat[10] = -0.1666666666666667; -elem_mat[11] = -0.1666666666666667; -elem_mat[12] = 1.094804732981736e-10; -elem_mat[13] = -0.1666666666666667; -elem_mat[14] = -0.1666666667761472; -elem_mat[15] = 0.666666666447706; -elem_mat[16] = 1.094804841401953e-10; -elem_mat[17] = -0.1666666667761472; -elem_mat[18] = -0.1666666666666667; -elem_mat[19] = 1.094804871759614e-10; -elem_mat[20] = -0.1666666666666667; -elem_mat[21] = 0.6666666664477059; -elem_mat[22] = -0.1666666666666668; -elem_mat[23] = -0.1666666667761472; -elem_mat[24] = -0.1666666666666667; -elem_mat[25] = 1.094804702624075e-10; -elem_mat[26] = 0.666666666447706; -elem_mat[27] = 1.094804802370675e-10; -elem_mat[28] = -0.1666666666666667; -elem_mat[29] = 1.094804698287266e-10; -elem_mat[30] = 0.666666666447706; -elem_mat[31] = 1.094805079926431e-10; -elem_mat[32] = -0.1666666666666667; -elem_mat[33] = 0.666666666447706; -elem_mat[34] = 1.094804663592797e-10; -elem_mat[35] = 0.666666666447706; -} - -UTEST_CASE(diffusionMatrix) -{ - std::vector elem_mat_correct(64); - get_test_elem_mat(elem_mat_correct); - - const size_t len = miniFE::Hex8::numNodesPerElem*miniFE::Hex8::numNodesPerElem; - Scalar elem_mat[len]; - Scalar testcoords[miniFE::Hex8::numNodesPerElem*miniFE::Hex8::spatialDim]; - - miniFE::get_hex8_node_coords_3d(0, 0, 0, 1.0, &testcoords[0]); - - miniFE::Hex8::diffusionMatrix_symm(testcoords, elem_mat); - - for(size_t i=0; i 1.e-6) { - return false; - } - } - - Scalar elem_vec_correct[miniFE::Hex8::numNodesPerElem]; - elem_vec_correct[0] = 0.125; - elem_vec_correct[1] = 0.125; - elem_vec_correct[2] = 0.125; - elem_vec_correct[3] = 0.125; - elem_vec_correct[4] = 0.125; - elem_vec_correct[5] = 0.125; - elem_vec_correct[6] = 0.125; - elem_vec_correct[7] = 0.125; - - Scalar elem_vec[miniFE::Hex8::numNodesPerElem]; - miniFE::Hex8::sourceVector(testcoords, elem_vec); - - const size_t nn = miniFE::Hex8::numNodesPerElem; - for(size_t i=0; i 1.e-13) { - return false; - } - } - - return true; -} - -UTEST_CASE(sum_into_row) -{ - SerialComputeNode compute_node; - miniFE::CSRMatrix A(compute_node); - A.rows.resize(1,0); - A.row_offsets.resize(2,0); - A.row_offsets[1] = 4; - A.packed_cols.resize(4); - A.packed_cols[0] = 0; - A.packed_cols[1] = 1; - A.packed_cols[2] = 2; - A.packed_cols[3] = 3; - A.packed_coefs.resize(4,0); - - std::vector indices(4); - indices[0] = 2; - indices[1] = 0; - indices[2] = 1; - indices[3] = 3; - std::vector coefs(4); - coefs[0] = 2.0; - coefs[1] = 0.0; - coefs[2] = 1.0; - coefs[3] = 3.0; - - miniFE::sum_into_row(0, 4, &indices[0], &coefs[0], A); - - coefs[0] = 0.0; - coefs[1] = 1.0; - coefs[2] = 2.0; - coefs[3] = 3.0; - - if (coefs != A.packed_coefs) { - return false; - } - - return true; -} - -UTEST_CASE(sum_in_elem_matrix) -{ - SerialComputeNode compute_node; - miniFE::CSRMatrix A(compute_node); - A.rows.resize(4,0); - A.rows[0] = 0; - A.rows[1] = 1; - A.rows[2] = 2; - A.rows[3] = 3; - A.row_offsets.resize(5,0); - A.row_offsets[1] = 4; - A.row_offsets[2] = 8; - A.row_offsets[3] = 12; - A.row_offsets[4] = 16; - A.packed_cols.resize(16); - A.packed_cols[0] = 0; - A.packed_cols[1] = 1; - A.packed_cols[2] = 2; - A.packed_cols[3] = 3; - A.packed_cols[4] = 0; - A.packed_cols[5] = 1; - A.packed_cols[6] = 2; - A.packed_cols[7] = 3; - A.packed_cols[8] = 0; - A.packed_cols[9] = 1; - A.packed_cols[10] = 2; - A.packed_cols[11] = 3; - A.packed_cols[12] = 0; - A.packed_cols[13] = 1; - A.packed_cols[14] = 2; - A.packed_cols[15] = 3; - - A.packed_coefs.resize(16,0); - - std::vector indices(4); - indices[0] = 2; - indices[1] = 0; - indices[2] = 1; - indices[3] = 3; - std::vector coefs(16); - coefs[0] = 2.0; - coefs[1] = 0.0; - coefs[2] = 1.0; - coefs[3] = 3.0; - coefs[4] = 2.0; - coefs[5] = 0.0; - coefs[6] = 1.0; - coefs[7] = 3.0; - coefs[8] = 2.0; - coefs[9] = 0.0; - coefs[10] = 1.0; - coefs[11] = 3.0; - coefs[12] = 2.0; - coefs[13] = 0.0; - coefs[14] = 1.0; - coefs[15] = 3.0; - - miniFE::sum_in_elem_matrix(4, &indices[0], &coefs[0], A); - - coefs[0] = 0.0; - coefs[1] = 1.0; - coefs[2] = 2.0; - coefs[3] = 3.0; - coefs[4] = 0.0; - coefs[5] = 1.0; - coefs[6] = 2.0; - coefs[7] = 3.0; - coefs[8] = 0.0; - coefs[9] = 1.0; - coefs[10] = 2.0; - coefs[11] = 3.0; - coefs[12] = 0.0; - coefs[13] = 1.0; - coefs[14] = 2.0; - coefs[15] = 3.0; - - if (coefs != A.packed_coefs) { - return false; - } - - return true; -} - -UTEST_CASE(assemble_FE_data) -{ - int global_box[3][2] = {{ 0, 1 }, { 0, 1 }, { 0, 1 } }; - int box[3][2] = {{ 0, 1 }, { 0, 1 }, { 0, 1 } }; - - miniFE::simple_mesh_description mesh(global_box, box); - - SerialComputeNode compute_node; - miniFE::CSRMatrix A(compute_node); - - miniFE::generate_matrix_structure(mesh, A); - - miniFE::Vector b(0, 8, compute_node); - - const int num_nodes = 8; - - std::vector symm_elem_mat_correct; - get_test_elem_mat(symm_elem_mat_correct); - std::vector full_elem_mat_correct(num_nodes*num_nodes); - - int offset = 0; - for(int i=0; i=i) { - Scalar coef = symm_elem_mat_correct[offset++]; - full_elem_mat_correct[i*num_nodes+j] = coef; - full_elem_mat_correct[j*num_nodes+i] = coef; - } - } - } - - std::vector elem_node_ids(num_nodes); - elem_node_ids[0] = 0; - elem_node_ids[1] = 1; - elem_node_ids[2] = 5; - elem_node_ids[3] = 4; - elem_node_ids[4] = 2; - elem_node_ids[5] = 3; - elem_node_ids[6] = 7; - elem_node_ids[7] = 6; - - //now for each row of of the 8x8 elem_mat_correct, reorder that - //row according to the order of elem_node_ids, rows and columns. - std::vector elem_mat_reordered(num_nodes*num_nodes); - offset = 0; - int row = 0; - for(int i=0; i& assembled_mat = A.packed_coefs; - - for(size_t i=0; i 1.e-13) { - return false; - } - } - - return true; -} - -UTEST_CASE(pll_matvec2) -{ - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - if (numprocs != 2) { - if (myproc == 0) std::cout <<"pll_matvec2_utest only runs when numprocs=2."< A(compute_node); - miniFE::Vector x(myproc, 4,compute_node) ,y(myproc, 4,compute_node); - - A.rows.resize(2, 0); - if (myproc == 0) { - A.rows[0] = 0; A.rows[1] = 1; - } - else { - A.rows[0] = 2; A.rows[1] = 3; - } - - A.row_offsets.resize(3, 0); - if (myproc == 0) { - A.row_offsets[1] = 2; A.row_offsets[2] = 6; - } - else { - A.row_offsets[1] = 2; A.row_offsets[2] = 4; - } - - if (myproc == 0) { - A.packed_cols.resize(6, 0); - A.packed_cols[1] = 1; - A.packed_cols[2] = 0; - A.packed_cols[3] = 1; - A.packed_cols[4] = 2; - A.packed_cols[5] = 3; - } - else { - A.packed_cols.resize(4, 0); - A.packed_cols[0] = 1; - A.packed_cols[1] = 2; - A.packed_cols[2] = 1; - A.packed_cols[3] = 3; - } - if (myproc == 0) { - A.packed_coefs.resize(6, 1); - A.packed_coefs[2] = 2; - A.packed_coefs[4] = -1; - } - else { - A.packed_coefs.resize(4, 1); - A.packed_coefs[0] = -2; - A.packed_coefs[2] = 2; - } - - if (myproc == 0) { - x.coefs[0] = 1; x.coefs[1] = 2; - } - else { - x.coefs[0] = 3; x.coefs[1] = 4; - } - - miniFE::make_local_matrix(A); - miniFE::exchange_externals(A, x); - miniFE::matvec(A, x, y); - - if (myproc == 0) { - if (y.coefs[0] != 3.0 || y.coefs[1] != 5.0) { - std::cout << "proc 0: pll_matvec2_utest failed" << std::endl; - return false; - } - } - else { - if (y.coefs[0] != -1.0 || y.coefs[1] != 8.0) { - std::cout << "proc 1: pll_matvec2_utest failed" << std::endl; - return false; - } - } - - return true; -} - -UTEST_CASE(pll_matvec3) -{ - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - if (numprocs != 3) { - if (myproc == 0) std::cout <<"pll_matvec3_utest only runs when numprocs=3."< A(compute_node); - miniFE::Vector x(myproc, 6, compute_node) ,y(myproc, 6, compute_node); - - A.rows.resize(2, 0); - A.rows[0] = myproc*2; A.rows[1] = myproc*2+1; - - A.row_offsets.resize(3, 0); - if (myproc == 0) { - A.row_offsets[1] = 2; A.row_offsets[2] = 4; - } - else if (myproc == 1) { - A.row_offsets[1] = 3; A.row_offsets[2] = 4; - } - else { - A.row_offsets[1] = 2; A.row_offsets[2] = 4; - } - - A.packed_cols.resize(4, 0); - if (myproc == 0) { - A.packed_cols[1] = 3; - A.packed_cols[2] = 1; - A.packed_cols[3] = 5; - } - else if (myproc == 1) { - A.packed_cols[1] = 2; - A.packed_cols[2] = 4; - A.packed_cols[3] = 3; - } - else { - A.packed_cols[0] = 1; - A.packed_cols[1] = 4; - A.packed_cols[2] = 3; - A.packed_cols[3] = 5; - } - - A.packed_coefs.resize(4, 1); - if (myproc == 0) { - A.packed_coefs[1] = -1; - A.packed_coefs[3] = -1; - } - else if (myproc == 1) { - A.packed_coefs[0] = 2; - A.packed_coefs[2] = -1; - } - else { - A.packed_coefs[0] = 2; - A.packed_coefs[2] = 2; - } - - if (myproc == 0) { - x.coefs[0] = 1; x.coefs[1] = 2; - } - else if (myproc == 1) { - x.coefs[0] = 3; x.coefs[1] = 4; - } - else { - x.coefs[0] = 5; x.coefs[1] = 6; - } - - miniFE::make_local_matrix(A); - miniFE::exchange_externals(A, x); - miniFE::matvec(A, x, y); - - if (myproc == 0) { - if (y.coefs[0] != -3.0 || y.coefs[1] != -4.0) { - std::cout << "proc 0: pll_matvec3 failed" << std::endl; - return false; - } - } - else if (myproc == 1) { - if (y.coefs[0] != 0.0 || y.coefs[1] != 4.0) { - std::cout << "proc 1: pll_matvec3 failed" << std::endl; - return false; - } - } - else { - if (y.coefs[0] != 9.0 || y.coefs[1] != 14.0) { - std::cout << "proc 2: pll_matvec3 failed" << std::endl; - return false; - } - } - - return true; -} - -UTEST_CASE(ComputeNode_waxpy1) -{ - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - if (numprocs != 1) { - if (myproc == 0) std::cout <<"ComputeNode_waxpy1 only runs when numprocs=1."< x(0, len, compute_node), y(0, len, compute_node), w(0, len, compute_node); - - std::vector inds(len, 0); - for(size_t i=0; i coefs(len, 1); - - miniFE::sum_into_vector(len, &inds[0], &coefs[0], x); - miniFE::sum_into_vector(len, &inds[0], &coefs[0], y); - miniFE::sum_into_vector(len, &inds[0], &coefs[0], w); - - Scalar* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); - Scalar* d_y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); - - compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); - compute_node.copy_to_buffer(&y.coefs[0], y.coefs.size(), d_y); - - miniFE::waxpby(1.0, x, 1.0, y, w); - - Scalar* d_w = compute_node.get_buffer(&w.coefs[0], w.coefs.size()); - compute_node.copy_from_buffer(&w.coefs[0], w.coefs.size(), d_w); - - Scalar expected = 2; - Scalar tol = 1.e-7; - - for(size_t i=0; i tol) { - return false; - } - } - return true; -} - -UTEST_CASE(ComputeNode_dot1) -{ - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - if (numprocs != 1) { - if (myproc == 0) std::cout <<"ComputeNode_dot1 only runs when numprocs=1."< x(0, N, compute_node), y(0, N, compute_node); - - std::vector inds(N, 0); - for(size_t i=0; i coefs(N, 1); - - miniFE::sum_into_vector(N, &inds[0], &coefs[0], x); - miniFE::sum_into_vector(N, &inds[0], &coefs[0], y); - - Scalar* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); - Scalar* d_y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); - - compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); - compute_node.copy_to_buffer(&y.coefs[0], y.coefs.size(), d_y); - - Scalar dot_prod = miniFE::dot(x,y); - - if (dot_prod != N) { - return false; - } - - return true; -} - -UTEST_CASE(ComputeNode_TBB_dot1) -{ - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - if (numprocs != 1) { - if (myproc == 0) std::cout <<"ComputeNode_TBB_dot1_utest only runs when numprocs=1."< x(0, N, compute_node), y(0, N, compute_node); - - std::vector inds(N, 0); - for(size_t i=0; i coefs(N, 1); - - miniFE::sum_into_vector(inds.size(), &inds[0], &coefs[0], x); - miniFE::sum_into_vector(inds.size(), &inds[0], &coefs[0], y); - - Scalar dot_prod = miniFE::dot(x,y); - - if (dot_prod != N) { - return false; - } - -#else - std::cout << "ComputeNode_TBB_dot1_utest only runs when MINIFE_HAVE_TBB is defined."< x(0, 10, compute_node), y(0, 10, compute_node); - - size_t len = 10; - std::vector inds(len, 0); - for(size_t i=0; i coefs(len, 1); - - miniFE::sum_into_vector(len, &inds[0], &coefs[0], x); - miniFE::sum_into_vector(len, &inds[0], &coefs[0], y); - - Scalar* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); - Scalar* d_y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); - - compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); - compute_node.copy_to_buffer(&y.coefs[0], y.coefs.size(), d_y); - - Scalar dot_prod = miniFE::dot(x, y); - - if (std::abs(dot_prod-10.0) > 1.e-12) { - return false; - } - return true; -} - -UTEST_CASE(ser_matvec1) -{ - int numprocs = 1, myproc = 0; -#ifdef HAVE_MPI - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#endif - - if (numprocs != 1) { - if (myproc == 0) std::cout <<"ser_matvec1_utest only runs when numprocs=1."< A(compute_node); - miniFE::Vector x(0, 4,compute_node) ,y(0, 4,compute_node); - - A.rows.resize(4, 0); - A.rows[0] = 0; A.rows[1] = 1; - A.rows[2] = 2; A.rows[3] = 3; - - A.row_offsets.resize(5, 0); - A.row_offsets[1] = 2; A.row_offsets[2] = 6; - A.row_offsets[3] = 8; A.row_offsets[4] = 10; - - A.packed_cols.resize(10, 0); - A.packed_cols[1] = 1; - A.packed_cols[2] = 0; - A.packed_cols[3] = 1; - A.packed_cols[4] = 2; - A.packed_cols[5] = 3; - A.packed_cols[6] = 1; - A.packed_cols[7] = 2; - A.packed_cols[8] = 1; - A.packed_cols[9] = 3; - - A.packed_coefs.resize(10, 1); - A.packed_coefs[2] = 2; - A.packed_coefs[4] = -1; - A.packed_coefs[6] = -2; - A.packed_coefs[8] = 2; - - x.coefs[0] = 1; x.coefs[1] = 2; x.coefs[2] = 3; x.coefs[3] = 4; - - for(size_t i=0; i 1.e-12) { - std::cout << "failed 0. y.coefs[0]=" < 1.e-12) { - std::cout << "failed 1. y.coefs[1]=" < 1.e-12) { - std::cout << "failed 2. y.coefs[2]=" < 1.e-12) { - std::cout << "failed 3. y.coefs[3]=" < x(0, len,compute_node) ,y(0, len,compute_node), w(0, len,compute_node); - - Scalar one = 1, zero = 0; - - for(size_t i=0; i1.e-2 ? 1.e-6 * (waxpy_flops/tWAXPY) : 0; - - std::cout << "waxpby_perf_utest: WAXPBY time: " << tWAXPY << ", len: " << len << ", num_iters: " << num_iters - << ", MFLOPS: " << waxpy_mflops << std::endl; - return true; -} - -UTEST_CASE(matmat3x3_1) -{ - Scalar A[] = {1, 4, 7, 2, 5, 8, 3, 6, 9}; - Scalar B[] = {1, 1, 1, 2, 2, 2, 3, 3, 3}; - Scalar C[9]; - - miniFE::matmat3x3(A, B, C); - - TEST_EQUAL(C[0], 6.0); - TEST_EQUAL(C[1], 15.0); - TEST_EQUAL(C[2], 24.0); - TEST_EQUAL(C[3], 12.0); - TEST_EQUAL(C[4], 30.0); - TEST_EQUAL(C[5], 48.0); - TEST_EQUAL(C[6], 18.0); - TEST_EQUAL(C[7], 45.0); - TEST_EQUAL(C[8], 72.0); - - return true; -} - -UTEST_CASE(matmat3x3_X_3xn_1) -{ - Scalar A[] = {1, 4, 7, 2, 5, 8, 3, 6, 9}; - Scalar B[] = {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}; - Scalar C[18]; - - miniFE::matmat3x3_X_3xn(A, 6, B, C); - - TEST_EQUAL(C[0], 6.0); - TEST_EQUAL(C[1], 15.0); - TEST_EQUAL(C[2], 24.0); - TEST_EQUAL(C[3], 12.0); - TEST_EQUAL(C[4], 30.0); - TEST_EQUAL(C[5], 48.0); - TEST_EQUAL(C[6], 18.0); - TEST_EQUAL(C[7], 45.0); - TEST_EQUAL(C[8], 72.0); - TEST_EQUAL(C[9], 24.0); - TEST_EQUAL(C[10], 60.0); - TEST_EQUAL(C[11], 96.0); - TEST_EQUAL(C[12], 30.0); - TEST_EQUAL(C[13], 75.0); - TEST_EQUAL(C[14], 120.0); - TEST_EQUAL(C[15], 36.0); - TEST_EQUAL(C[16], 90.0); - TEST_EQUAL(C[17], 144.0); - - return true; -} - -UTEST_CASE(matTransMat3x3_X_3xn_1) -{ - Scalar A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - Scalar B[] = {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}; - Scalar C[18]; - - miniFE::matTransMat3x3_X_3xn(A, 6, B, C); - - TEST_EQUAL(C[0], 6.0); - TEST_EQUAL(C[1], 15.0); - TEST_EQUAL(C[2], 24.0); - TEST_EQUAL(C[3], 12.0); - TEST_EQUAL(C[4], 30.0); - TEST_EQUAL(C[5], 48.0); - TEST_EQUAL(C[6], 18.0); - TEST_EQUAL(C[7], 45.0); - TEST_EQUAL(C[8], 72.0); - TEST_EQUAL(C[9], 24.0); - TEST_EQUAL(C[10], 60.0); - TEST_EQUAL(C[11], 96.0); - TEST_EQUAL(C[12], 30.0); - TEST_EQUAL(C[13], 75.0); - TEST_EQUAL(C[14], 120.0); - TEST_EQUAL(C[15], 36.0); - TEST_EQUAL(C[16], 90.0); - TEST_EQUAL(C[17], 144.0); - - return true; -} - -UTEST_CASE(BoxIterator1) -{ - int box1[3][2] = {{ 0, 2 }, { 0, 2 }, { 0, 2 } }; - miniFE::BoxIterator iter = miniFE::BoxIterator::begin(box1); - miniFE::BoxIterator end = miniFE::BoxIterator::end(box1); - - for(int iz=box1[2][0]; iz(nx,ny,nz,iter.x,iter.y,-iter.z); - int x, y, z; - miniFE::get_coords(elemID, nx,ny,nz, x,y,z); - TEST_EQUAL(x,iter.x); - TEST_EQUAL(y,iter.y); - TEST_EQUAL(z,-iter.z); - } - - return true; -} - -#endif - diff --git a/kokkos/basic/verify_solution.hpp b/kokkos/basic/verify_solution.hpp deleted file mode 100644 index fb3bd3b..0000000 --- a/kokkos/basic/verify_solution.hpp +++ /dev/null @@ -1,170 +0,0 @@ -#ifndef _verify_solution_hpp_ -#define _verify_solution_hpp_ - -//@HEADER -// ************************************************************************ -// -// miniFE: simple finite-element assembly and linear-solve -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#include -#include -#include -#include - -#include -#include -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -namespace miniFE { - -template -struct err_info { - Scalar err; - Scalar computed; - Scalar analytic; - Scalar coords[3]; -}; - -template -void -verify_solution(const simple_mesh_description& mesh, - const VectorType& x) -{ - typedef typename VectorType::GlobalOrdinalType GlobalOrdinal; - typedef typename VectorType::ScalarType Scalar; - - int global_nodes_x = mesh.global_box[0][1]+1; - int global_nodes_y = mesh.global_box[1][1]+1; - int global_nodes_z = mesh.global_box[2][1]+1; - Box box; - copy_box(mesh.local_box, box); - - //num-owned-nodes in each dimension is num-elems+1 - //only if num-elems > 0 in that dimension *and* - //we are at the high end of the global range in that dimension: - if (box[0][1] > box[0][0] && box[0][1] == mesh.global_box[0][1]) ++box[0][1]; - if (box[1][1] > box[1][0] && box[1][1] == mesh.global_box[1][1]) ++box[1][1]; - if (box[2][1] > box[2][0] && box[2][1] == mesh.global_box[2][1]) ++box[2][1]; - - GlobalOrdinal nrows = get_num_ids(box); - - std::vector rows(nrows); - std::vector row_coords(nrows*3); - - unsigned roffset = 0; - - for(int iz=box[2][0]; iz(global_nodes_x, global_nodes_y, global_nodes_z, - ix, iy, iz); - Scalar x, y, z; - get_coords(row_id, global_nodes_x, global_nodes_y, global_nodes_z, x, y, z); - - rows[roffset] = mesh.map_id_to_row(row_id); - row_coords[roffset*3] = x; - row_coords[roffset*3+1] = y; - row_coords[roffset*3+2] = z; - ++roffset; - } - } - } - - if (x.local_size != rows.size() || x.local_size != nrows) { - throw std::runtime_error("verify_solution ERROR, size mismatch"); - } - - const int num_terms = 300; - - err_info max_error; - max_error.err = 0.0; - - for(size_t i=0; i max_error.err) { - max_error.err = err; - max_error.computed = computed_soln; - max_error.analytic = analytic_soln; - max_error.coords[0] = x; - max_error.coords[1] = y; - max_error.coords[2] = z; - } - } - - Scalar local_max_err = max_error.err; - Scalar global_max_err = 0; -#ifdef HAVE_MPI - MPI_Allreduce(&local_max_err, &global_max_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); -#else - global_max_err = local_max_err; -#endif - - if (local_max_err == global_max_err) { - if (max_error.err > 1.e-6) { - std::cout << "max absolute error is "< -#include - -#include -#include -#include "vectorTests.hpp" -#include "YAML_Element.hpp" -#include "YAML_Doc.hpp" -#ifdef HAVE_MPI -#include // If this routine is compiled with -DHAVE_MPI - // then include mpi.h -#endif -void addResults(YAML_Element * currentElement, const std::vector & times, double fnops); - - -#undef DEBUG -int main(int argc, char *argv[]) { -#ifdef HAVE_MPI - // Initialize MPI - MPI_Init(&argc, &argv); - int size, rank; // Number of MPI processes, My process ID - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - // I'm alive !!! - if (size < 100) std::cout << "Process "<> junk; -#endif - - if(argc != 2) { - std::cerr << "Usage:" << std::endl - << argv[0] << " n" << std::endl - << " where n is the problem size" << std::endl; - std::exit(1); - } - - size_t n = atoi(argv[1]); - int numTrials = 1000000/n; if (numTrials<1) numTrials = 1; - double fnops = 2.0 * ((double) size) *((double) n)*((double) numTrials); - std::vector times(4,0.0); - doc.add("Problem_size",n); - doc.add("Number_of_timing_trials",numTrials); - - std::vector dx(n, 1.0), dy(n, 1.0); - - int ierr = vectorTests(numTrials, dx, dy, times); - - if (ierr) std::cerr << "Error in call to vectorTests: " << ierr << ".\n" << std::endl; - - if (rank==0) { // Only PE 0 needs to compute and report timing results - - doc.add("Total_time_for_vector_tests_in_double",times[0]); - - - doc.add("Double_precision_results",""); - doc.get("Double_precision_results")->add("performance_summary",""); - YAML_Element * currentElement = doc.get("Double_precision_results"); - addResults(currentElement, times, fnops); - } - -#ifdef HAVE_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - - std::vector fx(n, 1.0f), fy(n, 1.0f); - ierr = vectorTests(numTrials, fx, fy, times); - if (ierr) std::cerr << "Error in call to vectorTests: " << ierr << ".\n" << std::endl; - - if (rank==0) { // Only PE 0 needs to compute and report timing results - - doc.add("Total_time_for_vector_tests_in_float",times[0]); - - - doc.add("Float_precision_results",""); - doc.get("Float_precision_results")->add("performance_summary",""); - YAML_Element * currentElement = doc.get("Float_precision_results"); - addResults(currentElement, times, fnops); - } - - if (rank==0) { // Only PE 0 needs to compute and report timing results - - std::string yaml = doc.generateYAML(); - std::cout << yaml; - } - // Finish up -#ifdef HAVE_MPI - MPI_Finalize(); -#endif - return 0; -} - -void addResults(YAML_Element * currentElement, const std::vector & times, double fnops) { - - currentElement->get("performance_summary")->add("total",""); - currentElement->get("performance_summary")->get("total")->add("time",times[0]); - currentElement->get("performance_summary")->get("total")->add("flops",3.0*fnops); - currentElement->get("performance_summary")->get("total")->add("mflops",3.0*fnops/times[0]/1.0E6); - - currentElement->get("performance_summary")->add("std_vector_bracket_notation",""); - currentElement->get("performance_summary")->get("std_vector_bracket_notation")->add("time",times[1]); - currentElement->get("performance_summary")->get("std_vector_bracket_notation")->add("flops",fnops); - currentElement->get("performance_summary")->get("std_vector_bracket_notation")->add("mflops",fnops/times[1]/1.0E6); - - currentElement->get("performance_summary")->add("raw_pointer_bracket_notation",""); - currentElement->get("performance_summary")->get("raw_pointer_bracket_notation")->add("time",times[2]); - currentElement->get("performance_summary")->get("raw_pointer_bracket_notation")->add("flops",fnops); - currentElement->get("performance_summary")->get("raw_pointer_bracket_notation")->add("mflops",fnops/times[2]/1.0E6); - - currentElement->get("performance_summary")->add("raw_pointer_deref_plusplus_notation",""); - currentElement->get("performance_summary")->get("raw_pointer_deref_plusplus_notation")->add("time",times[3]); - currentElement->get("performance_summary")->get("raw_pointer_deref_plusplus_notation")->add("flops",fnops); - currentElement->get("performance_summary")->get("raw_pointer_deref_plusplus_notation")->add("mflops",fnops/times[3]/1.0E6); - - return; -} diff --git a/kokkos/common/main.cpp b/kokkos/common/main.cpp deleted file mode 100644 index 9724f69..0000000 --- a/kokkos/common/main.cpp +++ /dev/null @@ -1,292 +0,0 @@ - -//@HEADER -// ************************************************************************ -// -// HPCCG: Simple Conjugate Gradient Benchmark Code -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -// Changelog -// -// Version 0.3 -// - Added timing of setup time for sparse MV -// - Corrected percentages reported for sparse MV with overhead -// -///////////////////////////////////////////////////////////////////////// - -// Main routine of a program that reads a sparse matrix, right side -// vector, solution vector and initial guess from a file in HPC -// format. This program then calls the HPCCG conjugate gradient -// solver to solve the problem, and then prints results. - -// Calling sequence: - -// test_HPCCG linear_system_file - -// Routines called: - -// read_HPC_row - Reads in linear system - -// mytimer - Timing routine (compile with -DWALL to get wall clock -// times - -// HPCCG - CG Solver - -// compute_residual - Compares HPCCG solution to known solution. - -#include -using std::cout; -using std::cerr; -using std::endl; -#include -#include -#include -#include -#include -#include -#ifdef USING_MPI -#include // If this routine is compiled with -DUSING_MPI - // then include mpi.h -#include "make_local_matrix.hpp" // Also include this function -#endif -#include "generate_matrix.hpp" -#include "read_HPC_row.hpp" -#include "mytimer.hpp" -#include "HPC_sparsemv.hpp" -#include "compute_residual.hpp" -#include "HPCCG.hpp" -#include "HPC_Sparse_Matrix.hpp" -#include "YAML_generator.hpp" -//#include "YAML_generator.cpp" - -#undef DEBUG -int main(int argc, char *argv[]) -{ - HPC_Sparse_Matrix *A; - double *x, *b, *xexact; - double norm, d; - int ierr = 0; - int i, j; - int ione = 1; - double times[7]; - double t6 = 0.0; - YAML_Doc doc("HPCCG","1.0"); - -#ifdef USING_MPI - - // Initialize MPI - MPI_Init(&argc, &argv); - int size, rank; // Number of MPI processes, My process ID - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - // I'm alive !!! - - if (size < 100) cout << "Process "<> junk; - } - - MPI_Barrier(MPI_COMM_WORLD); -#endif - - - if(argc != 2 && argc!=4) { - if (rank==0) - cerr << "Usage:" << endl - << "Mode 1: " << argv[0] << " nx ny nz" << endl - << " where nx, ny and nz are the local sub-block dimensions, or" << endl - << "Mode 2: " << argv[0] << " HPC_data_file " << endl - << " where HPC_data_file is a globally accessible file containing matrix data." << endl; - exit(1); - } - - if (argc==4) { - int nx = atoi(argv[1]); - int ny = atoi(argv[2]); - int nz = atoi(argv[3]); - doc.add("nx",argv[1]); - doc.add("ny",argv[2]); - doc.add("nz",argv[3]); - generate_matrix(nx, ny, nz, &A, &x, &b, &xexact); - } - else - read_HPC_row(argv[1], &A, &x, &b, &xexact); - -#ifdef USING_MPI - - // Transform matrix indices from global to local values. - // Define number of columns for the local matrix. - - t6 = mytimer(); make_local_matrix(A); t6 = mytimer() - t6; - times[6] = t6; - -#endif - - double t1 = mytimer(); // Initialize it (if needed) - int niters = 0; - double normr = 0.0; - int max_iter = 300; - double tolerance = 0.0; // Set tolerance to zero to make all runs do max_iter iterations - ierr = HPCCG( A, b, x, max_iter, tolerance, niters, normr, times); - - if (ierr) cerr << "Error in call to CG: " << ierr << ".\n" << endl; - -#ifdef USING_MPI - double t4 = times[4]; - double t4min = 0.0; - double t4max = 0.0; - double t4avg = 0.0; - MPI_Allreduce(&t4, &t4min, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); - MPI_Allreduce(&t4, &t4max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); - MPI_Allreduce(&t4, &t4avg, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - t4avg = t4avg/((double) size); -#endif - - if (rank==0) // Only PE 0 needs to compute and report timing results - { - doc.add("time_spent_in_CG",times[0]); - - cout << "Time spent in CG = " << times[0] << ".\n" << endl; - double fniters = niters; - double fnrow = A->total_nrow; double fnnz = A->total_nnz; - double fnops_ddot = fniters*4*fnrow; - double fnops_waxpby = fniters*6*fnrow; - double fnops_sparsemv = fniters*2*fnnz; - double fnops = fnops_ddot+fnops_waxpby+fnops_sparsemv; - - doc.add("number_of_iterations",fniters); - doc.add("final_residual",normr); - doc.add("performance_summary",""); - doc.get("performance_summary")->add("total",""); - doc.get("performance_summary")->get("total")->add("time",times[0]); - doc.get("performance_summary")->get("total")->add("flops",fnops); - doc.get("performance_summary")->get("total")->add("mflops",fnops/times[0]/1.0E6); - - doc.get("performance_summary")->add("ddot",""); - doc.get("performance_summary")->get("ddot")->add("time",times[1]); - doc.get("performance_summary")->get("ddot")->add("flops",fnops_ddot); - doc.get("performance_summary")->get("ddot")->add("mflops",fnops_ddot/times[1]/1.0E6); - - doc.get("performance_summary")->add("waxpby",""); - doc.get("performance_summary")->get("waxpby")->add("time",times[2]); - doc.get("performance_summary")->get("waxpby")->add("flops",fnops_waxpby); - doc.get("performance_summary")->get("waxpby")->add("mflops",fnops_waxpby/times[2]/1.0E6); - - doc.get("performance_summary")->add("sparsemv",""); - doc.get("performance_summary")->get("sparsemv")->add("time",times[3]); - doc.get("performance_summary")->get("sparsemv")->add("flops",fnops_sparsemv); - doc.get("performance_summary")->get("sparsemv")->add("mflops",fnops_sparsemv/times[3]/1.0E6); - - cout << "Number of iterations = " << niters << ".\n" << endl; - cout << "Final residual = " << normr << ".\n" << endl; - cout << "********** Performance Summary (times in sec) ***********" << endl << endl; - cout << "Total Time/FLOPS/MFLOPS = " - << times[0] << "/" << fnops << "/" - << fnops/times[0]/1.0E6 << "." << endl; - cout << "DDOT Time/FLOPS/MFLOPS = " - << times[1] << "/" << fnops_ddot << "/" - << fnops_ddot/times[1]/1.0E6 << "." << endl; -#ifdef USING_MPI - doc.get("performance_summary")->get("ddot")->add("min_MPI_Allreduce_time",t4min); - doc.get("performance_summary")->get("ddot")->add("max_MPI_Allreduce_time",t4max); - doc.get("performance_summary")->get("ddot")->add("avg_MPI_Allreduce_time",t4avg); - - cout << " Minimum DDOT MPI_Allreduce time (over all processors) = " << t4min << endl; - cout << " Maximum DDOT MPI_Allreduce time (over all processors) = " << t4max << endl; - cout << " Average DDOT MPI_Allreduce time (over all processors) = " << t4avg << endl; -#endif - cout << "WAXPBY Time/FLOPS/MFLOPS = " - << times[2] << "/" << fnops_waxpby << "/" - << fnops_waxpby/times[2]/1.0E6 << "." << endl; - cout << "SPARSEMV Time/FLOPS/MFLOPS = " - << times[3] << "/" << fnops_sparsemv << "/" - << fnops_sparsemv/(times[3])/1.0E6 << "." << endl; -#ifdef USING_MPI - double totalSparseMVTime = times[3] + times[5]+ times[6]; - - double mflops_w_overhead = fnops_sparsemv/(totalSparseMVTime)/1.0E6; - double po_time = (times[5]+times[6]); - double po_perc = po_time/totalSparseMVTime*100.0; - double po_set_time = times[6]; - double po_set_perc = po_set_time/totalSparseMVTime*100.0; - double po_Bdry_exch_time = times[5]; - double po_Bdry_exch_perc = po_Bdry_exch_time/totalSparseMVTime*100.0; - - YAML_Element* currnet_elem = doc.get("performance_summary")->get("sparsemv"); - currnet_elem->add("mflops_w_overhead",mflops_w_overhead); - currnet_elem->add("parallel_overhead",""); - currnet_elem->get("parallel_overhead")->add("time",po_time); - currnet_elem->get("parallel_overhead")->add("percentage",po_perc); - currnet_elem->get("parallel_overhead")->add("setup",""); - currnet_elem->get("parallel_overhead")->get("setup")->add("time",po_set_time); - currnet_elem->get("parallel_overhead")->get("setup")->add("percentage",po_set_perc); - currnet_elem->get("parallel_overhead")->add("Bdry_exchange",""); - currnet_elem->get("parallel_overhead")->get("Bdry_exchange")->add("time",po_Bdry_exch_time); - currnet_elem->get("parallel_overhead")->get("Bdry_exchange")->add("percentage",po_Bdry_exch_perc); - - cout << "SPARSEMV MFLOPS W OVRHEAD = " - << mflops_w_overhead << "." << endl; - cout << "SPARSEMV PARALLEL OVERHEAD Time = " - << po_time << " ( " << po_perc << " % )." << endl; - cout << " SPARSEMV PARALLEL OVERHEAD (Setup) Time = " - << po_set_time << " ( " << po_set_perc << " % )." << endl; - cout << " SPARSEMV PARALLEL OVERHEAD (Bdry Exchange) Time = " - << po_Bdry_exch_time << " ( " << po_Bdry_exch_perc << " % )." << endl; -#endif - } - - // Compute difference between known exact solution and computed solution - // All processors are needed here. - - double residual = 0; - if ((ierr = compute_residual(A->local_nrow, x, xexact, &residual))) - cerr << "Error in call to compute_residual: " << ierr << ".\n" << endl; - - if (rank==0){ - cout << "Difference between computed and exact = " - << residual << ".\n" << endl; - doc.add("diff_between_computed_and_exact",residual); - string yaml = doc.generateYAML(); - cout << yaml; - } - // Finish up -#ifdef USING_MPI - MPI_Finalize(); -#endif - return 0 ; -} diff --git a/kokkos/common/mytimer.cpp b/kokkos/common/mytimer.cpp deleted file mode 100644 index 71e19e6..0000000 --- a/kokkos/common/mytimer.cpp +++ /dev/null @@ -1,109 +0,0 @@ - -//@HEADER -// ************************************************************************ -// -// HPCCG: Simple Conjugate Gradient Benchmark Code -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER - -///////////////////////////////////////////////////////////////////////// - -// Function to return time in seconds. -// If compiled with no flags, return CPU time (user and system). -// If compiled with -DWALL, returns elapsed time. - -///////////////////////////////////////////////////////////////////////// -#ifdef HAVE_MPI -#include // If this routine is compiled with -DHAVE_MPI - // then include mpi.h -double mytimer(void) -{ - return(MPI_Wtime()); -} - - -#elif defined(UseClock) - -#include -double mytimer(void) -{ - clock_t t1; - static clock_t t0=0; - static double CPS = CLOCKS_PER_SEC; - double d; - - if (t0 == 0) t0 = clock(); - t1 = clock() - t0; - d = t1 / CPS; - return(d); -} - -#elif defined(WALL) - -#include -#include -#include -double mytimer(void) -{ - struct timeval tp; - static long start=0, startu; - if (!start) - { - gettimeofday(&tp, NULL); - start = tp.tv_sec; - startu = tp.tv_usec; - return(0.0); - } - gettimeofday(&tp, NULL); - return( ((double) (tp.tv_sec - start)) + (tp.tv_usec-startu)/1000000.0 ); -} - -#elif defined(UseTimes) - -#include -#include -#include -double mytimer(void) -{ - struct tms ts; - static double ClockTick=0.0; - - if (ClockTick == 0.0) ClockTick = (double) sysconf(_SC_CLK_TCK); - times(&ts); - return( (double) ts.tms_utime / ClockTick ); -} - -#else - -#include -#include -#include -double mytimer(void) -{ - struct rusage ruse; - getrusage(RUSAGE_SELF, &ruse); - return( (double)(ruse.ru_utime.tv_sec+ruse.ru_utime.tv_usec / 1000000.0) ); -} - -#endif diff --git a/kokkos/common/mytimer.hpp b/kokkos/common/mytimer.hpp deleted file mode 100644 index 94226a3..0000000 --- a/kokkos/common/mytimer.hpp +++ /dev/null @@ -1,32 +0,0 @@ - -//@HEADER -// ************************************************************************ -// -// HPCCG: Simple Conjugate Gradient Benchmark Code -// Copyright (2006) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER -#ifndef MYTIMER_H -#define MYTIMER_H -double mytimer(void); -#endif // MYTIMER_H diff --git a/kokkos/common/param_utils.cpp b/kokkos/common/param_utils.cpp deleted file mode 100644 index 0d9cbf3..0000000 --- a/kokkos/common/param_utils.cpp +++ /dev/null @@ -1,32 +0,0 @@ - -#include - -#include -#include - -namespace Mantevo { - -//------------------------------------------------------------- -void read_args_into_string(int argc, char** argv, std::string& arg_string) -{ - arg_string = argv[0]; - for(int i=1; i -#include - -//Parameter-parsing Utilities: -// -//The functions declared below are intended to assist with parsing -//input-parameters which may be command-line arguments and/or lines in a -//text file. -// -// Scenario: You want your program to accept parameters that are specified -// as command-line arguments and/or as lines in a text file (such -// as a YAML output file). i.e., your program can be run like this: -// % program.exe foo=3.14159 bar: 42 -// or -// % program.exe input_file=params.txt -// or -// % program.exe foo=3.14159 input_file = params.txt -// -//Example: -// Here is example code to obtain parameters using the 3 functions -// 'read_args_into_string', 'read_file_into_string' and 'parse_parameter': -// -// std::string arg_string; -// -// //put command-line-arguments into 'arg_string': -// read_args_into_string(argc, argv, arg_string); -// -// //do the command-line-arguments specify an 'input_file'? -// std::string filename = -// parse_parameter(arg_string,"input_file","none-specified"); -// -// if (filename != "none-specified") { -// std::string tmp; -// read_file_into_string(filename, tmp); -// arg_string += tmp; -// } -// -// //now parse the parameters: -// float foo = parse_parameter(arg_string, "foo", -9.9); -// int bar = parse_parameter(arg_string, "bar", -1); -// -//See the comments below for parse_parameter, for formatting requirements of -//named parameter-value pairs. -// - -namespace Mantevo { - -/** - * Concatenate command-line arguments into a single string. - * - * Note: this function is purely serial. If argc and argv have different - * values on different MPI processes, then you need to resolve that by - * broadcasting arg_string's contents. - */ -void read_args_into_string(int argc, char** argv, std::string& arg_string); - -/** - * Read the contents of a text-file into a single string. - * - * Note: this function is purely serial. If you want file_contents on multiple - * MPI processes, you need to broadcast it (or call this function on each - * MPI process...). - */ -void read_file_into_string(const std::string& filename, - std::string& file_contents); - -/** - * Parse a named parameter value from input 'arg_string'. - * - * Search 'arg_string' for an occurrence of param_name and attempt to parse - * a value into the return-type. If param_name is not found, then default_value - * is returned. - * - * Example: - * arg_string = "foo = 3.14159"; - * float foo = parse_parameter(arg_string, "foo", -999.9); - * //foo should now contain the value 3.14159; if 'foo' was not found in - * //arg_string, then -999.9 would have been returned. - * - * Other legal name-value separators are ':' and ' '. Extra spaces are also ok, - * e.g. "foo : 3.114159". - * - * Note that if a YAML file is read into a string, that would be a valid input - * string for this function. - */ -template -T parse_parameter(const std::string& arg_string, - const std::string& param_name, - const T& default_value) -{ - std::string::size_type pos = arg_string.find(param_name); - if (pos == std::string::npos) { - //if param_name is not found in arg_string, return default_value: - return default_value; - } - - pos += param_name.size(); - - if (arg_string.size() <= pos) return default_value; - - //skip past ' ', '=' or ':': - while(pos < arg_string.size() && - (arg_string[pos] == ' ' || - arg_string[pos] == '=' || - arg_string[pos] == ':')) - { - ++pos; - } - - if (arg_string[pos] == '=' || arg_string[pos] == ':') ++pos; - - std::string str = arg_string.substr(pos); - - std::istringstream isstr(str); - - T return_val = default_value; - - //parse value into return_val: - isstr >> return_val; - - //if parse failed, return default_value: - if (!isstr) return default_value; - - return return_val; -} - -}//namespace Mantevo - -#endif - diff --git a/kokkos/common/vectorTests.hpp b/kokkos/common/vectorTests.hpp deleted file mode 100644 index 53ce682..0000000 --- a/kokkos/common/vectorTests.hpp +++ /dev/null @@ -1,66 +0,0 @@ - -//@HEADER -// ************************************************************************ -// -// Mantevo: A collection of mini-applications for HPC -// Copyright (2008) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -// USA -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// ************************************************************************ -//@HEADER -#include -#include "mytimer.hpp" -template -int vectorTests(int numTrials, const std::vector & x, std::vector & y, - std::vector & times) { - - Scalar alpha = 2.0; - double t0; - - size_t n = x.size(); - - double tstart = mytimer(); // Initial time - - t0 = mytimer(); - for (int j=0;jcollective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. - */ - -#pragma once - -#include "../util_type.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \brief The BlockDiscontinuity class provides [collective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. ![](discont_logo.png) - * \ingroup BlockModule - * - * \par Overview - * A set of "head flags" (or "tail flags") is often used to indicate corresponding items - * that differ from their predecessors (or successors). For example, head flags are convenient - * for demarcating disjoint data segments as part of a segmented scan or reduction. - * - * \tparam T The data type to be flagged. - * \tparam BLOCK_THREADS The thread block size in threads. - * - * \par A Simple Example - * \blockcollective{BlockDiscontinuity} - * \par - * The code snippet below illustrates the head flagging of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockDiscontinuity for 128 threads on type int - * typedef cub::BlockDiscontinuity BlockDiscontinuity; - * - * // Allocate shared memory for BlockDiscontinuity - * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute head flags for discontinuities in the segment - * int head_flags[4]; - * BlockDiscontinuity(temp_storage).FlagHeads(head_flags, thread_data, cub::Inequality()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is - * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }. - * The corresponding output \p head_flags in those threads will be - * { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. - * - * \par Performance Considerations - * - Zero bank conflicts for most types. - * - */ -template < - typename T, - int BLOCK_THREADS> -class BlockDiscontinuity -{ -private: - - /****************************************************************************** - * Type definitions - ******************************************************************************/ - - /// Shared memory storage layout type (last element from each thread's input) - typedef T _TempStorage[BLOCK_THREADS]; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /// Internal storage allocator - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ _TempStorage private_storage; - return private_storage; - } - - - /// Specialization for when FlagOp has third index param - template ::HAS_PARAM> - struct ApplyOp - { - // Apply flag operator - static __device__ __forceinline__ bool Flag(FlagOp flag_op, const T &a, const T &b, int idx) - { - return flag_op(a, b, idx); - } - }; - - /// Specialization for when FlagOp does not have a third index param - template - struct ApplyOp - { - // Apply flag operator - static __device__ __forceinline__ bool Flag(FlagOp flag_op, const T &a, const T &b, int idx) - { - return flag_op(a, b); - } - }; - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Shared storage reference - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - - -public: - - /// \smemstorage{BlockDiscontinuity} - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockDiscontinuity() - : - temp_storage(PrivateStorage()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockDiscontinuity( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Each thread is identified using the supplied linear thread identifier - */ - __device__ __forceinline__ BlockDiscontinuity( - int linear_tid) ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(PrivateStorage()), - linear_tid(linear_tid) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Each thread is identified using the supplied linear thread identifier. - */ - __device__ __forceinline__ BlockDiscontinuity( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int linear_tid) ///< [in] [optional] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - - - //@} end member group - /******************************************************************//** - * \name Head flag operations - *********************************************************************/ - //@{ - - - /** - * \brief Sets head flags indicating discontinuities between items partitioned across the thread block, for which the first item has no reference and is always flagged. - * - * The flag head_flagsi is set for item - * inputi when - * flag_op(previous-item, inputi) - * returns \p true (where previous-item is either the preceding item - * in the same thread or the last item in the previous thread). - * Furthermore, head_flagsi is always set for - * input>0 in thread0. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates the head-flagging of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockDiscontinuity for 128 threads on type int - * typedef cub::BlockDiscontinuity BlockDiscontinuity; - * - * // Allocate shared memory for BlockDiscontinuity - * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute head flags for discontinuities in the segment - * int head_flags[4]; - * BlockDiscontinuity(temp_storage).FlagHeads(head_flags, thread_data, cub::Inequality()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is - * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }. - * The corresponding output \p head_flags in those threads will be - * { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam FlagT [inferred] The flag type (must be an integer type) - * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. - */ - template < - int ITEMS_PER_THREAD, - typename FlagT, - typename FlagOp> - __device__ __forceinline__ void FlagHeads( - FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - FlagOp flag_op) ///< [in] Binary boolean flag predicate - { - // Share last item - temp_storage[linear_tid] = input[ITEMS_PER_THREAD - 1]; - - __syncthreads(); - - // Set flag for first item - head_flags[0] = (linear_tid == 0) ? - 1 : // First thread - ApplyOp::Flag( - flag_op, - temp_storage[linear_tid - 1], - input[0], - linear_tid * ITEMS_PER_THREAD); - - // Set head_flags for remaining items - #pragma unroll - for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ITEM++) - { - head_flags[ITEM] = ApplyOp::Flag( - flag_op, - input[ITEM - 1], - input[ITEM], - (linear_tid * ITEMS_PER_THREAD) + ITEM); - } - } - - - /** - * \brief Sets head flags indicating discontinuities between items partitioned across the thread block. - * - * The flag head_flagsi is set for item - * inputi when - * flag_op(previous-item, inputi) - * returns \p true (where previous-item is either the preceding item - * in the same thread or the last item in the previous thread). - * For thread0, item input0 is compared - * against \p tile_predecessor_item. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates the head-flagging of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockDiscontinuity for 128 threads on type int - * typedef cub::BlockDiscontinuity BlockDiscontinuity; - * - * // Allocate shared memory for BlockDiscontinuity - * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Have thread0 obtain the predecessor item for the entire tile - * int tile_predecessor_item; - * if (threadIdx.x == 0) tile_predecessor_item == ... - * - * // Collectively compute head flags for discontinuities in the segment - * int head_flags[4]; - * BlockDiscontinuity(temp_storage).FlagHeads( - * head_flags, thread_data, cub::Inequality(), tile_predecessor_item); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is - * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }, - * and that \p tile_predecessor_item is \p 0. The corresponding output \p head_flags in those threads will be - * { [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam FlagT [inferred] The flag type (must be an integer type) - * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. - */ - template < - int ITEMS_PER_THREAD, - typename FlagT, - typename FlagOp> - __device__ __forceinline__ void FlagHeads( - FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - FlagOp flag_op, ///< [in] Binary boolean flag predicate - T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). - { - // Share last item - temp_storage[linear_tid] = input[ITEMS_PER_THREAD - 1]; - - __syncthreads(); - - // Set flag for first item - int predecessor = (linear_tid == 0) ? - tile_predecessor_item : // First thread - temp_storage[linear_tid - 1]; - - head_flags[0] = ApplyOp::Flag( - flag_op, - predecessor, - input[0], - linear_tid * ITEMS_PER_THREAD); - - // Set flag for remaining items - #pragma unroll - for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ITEM++) - { - head_flags[ITEM] = ApplyOp::Flag( - flag_op, - input[ITEM - 1], - input[ITEM], - (linear_tid * ITEMS_PER_THREAD) + ITEM); - } - } - - - //@} end member group - /******************************************************************//** - * \name Tail flag operations - *********************************************************************/ - //@{ - - - /** - * \brief Sets tail flags indicating discontinuities between items partitioned across the thread block, for which the last item has no reference and is always flagged. - * - * The flag tail_flagsi is set for item - * inputi when - * flag_op(inputi, next-item) - * returns \p true (where next-item is either the next item - * in the same thread or the first item in the next thread). - * Furthermore, tail_flagsITEMS_PER_THREAD-1 is always - * set for threadBLOCK_THREADS-1. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates the tail-flagging of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockDiscontinuity for 128 threads on type int - * typedef cub::BlockDiscontinuity BlockDiscontinuity; - * - * // Allocate shared memory for BlockDiscontinuity - * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute tail flags for discontinuities in the segment - * int tail_flags[4]; - * BlockDiscontinuity(temp_storage).FlagTails(tail_flags, thread_data, cub::Inequality()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is - * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }. - * The corresponding output \p tail_flags in those threads will be - * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam FlagT [inferred] The flag type (must be an integer type) - * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. - */ - template < - int ITEMS_PER_THREAD, - typename FlagT, - typename FlagOp> - __device__ __forceinline__ void FlagTails( - FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - FlagOp flag_op) ///< [in] Binary boolean flag predicate - { - // Share first item - temp_storage[linear_tid] = input[0]; - - __syncthreads(); - - // Set flag for last item - tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? - 1 : // Last thread - ApplyOp::Flag( - flag_op, - input[ITEMS_PER_THREAD - 1], - temp_storage[linear_tid + 1], - (linear_tid * ITEMS_PER_THREAD) + (ITEMS_PER_THREAD - 1)); - - // Set flags for remaining items - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD - 1; ITEM++) - { - tail_flags[ITEM] = ApplyOp::Flag( - flag_op, - input[ITEM], - input[ITEM + 1], - (linear_tid * ITEMS_PER_THREAD) + ITEM); - } - } - - - /** - * \brief Sets tail flags indicating discontinuities between items partitioned across the thread block. - * - * The flag tail_flagsi is set for item - * inputi when - * flag_op(inputi, next-item) - * returns \p true (where next-item is either the next item - * in the same thread or the first item in the next thread). - * For threadBLOCK_THREADS-1, item - * inputITEMS_PER_THREAD-1 is compared - * against \p tile_predecessor_item. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates the tail-flagging of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockDiscontinuity for 128 threads on type int - * typedef cub::BlockDiscontinuity BlockDiscontinuity; - * - * // Allocate shared memory for BlockDiscontinuity - * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Have thread127 obtain the successor item for the entire tile - * int tile_successor_item; - * if (threadIdx.x == 127) tile_successor_item == ... - * - * // Collectively compute tail flags for discontinuities in the segment - * int tail_flags[4]; - * BlockDiscontinuity(temp_storage).FlagTails( - * tail_flags, thread_data, cub::Inequality(), tile_successor_item); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is - * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] } - * and that \p tile_successor_item is \p 125. The corresponding output \p tail_flags in those threads will be - * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam FlagT [inferred] The flag type (must be an integer type) - * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. - */ - template < - int ITEMS_PER_THREAD, - typename FlagT, - typename FlagOp> - __device__ __forceinline__ void FlagTails( - FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - FlagOp flag_op, ///< [in] Binary boolean flag predicate - T tile_successor_item) ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). - { - // Share first item - temp_storage[linear_tid] = input[0]; - - __syncthreads(); - - // Set flag for last item - int successor_item = (linear_tid == BLOCK_THREADS - 1) ? - tile_successor_item : // Last thread - temp_storage[linear_tid + 1]; - - tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::Flag( - flag_op, - input[ITEMS_PER_THREAD - 1], - successor_item, - (linear_tid * ITEMS_PER_THREAD) + (ITEMS_PER_THREAD - 1)); - - // Set flags for remaining items - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD - 1; ITEM++) - { - tail_flags[ITEM] = ApplyOp::Flag( - flag_op, - input[ITEM], - input[ITEM + 1], - (linear_tid * ITEMS_PER_THREAD) + ITEM); - } - } - - //@} end member group - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/block/block_exchange.cuh b/kokkos/kokkos/TPL/cub/block/block_exchange.cuh deleted file mode 100644 index b7b9534..0000000 --- a/kokkos/kokkos/TPL/cub/block/block_exchange.cuh +++ /dev/null @@ -1,918 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * The cub::BlockExchange class provides [collective](index.html#sec0) methods for rearranging data partitioned across a CUDA thread block. - */ - -#pragma once - -#include "../util_arch.cuh" -#include "../util_macro.cuh" -#include "../util_type.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \brief The BlockExchange class provides [collective](index.html#sec0) methods for rearranging data partitioned across a CUDA thread block. ![](transpose_logo.png) - * \ingroup BlockModule - * - * \par Overview - * It is commonplace for blocks of threads to rearrange data items between - * threads. For example, the global memory subsystem prefers access patterns - * where data items are "striped" across threads (where consecutive threads access consecutive items), - * yet most block-wide operations prefer a "blocked" partitioning of items across threads - * (where consecutive items belong to a single thread). - * - * \par - * BlockExchange supports the following types of data exchanges: - * - Transposing between [blocked](index.html#sec5sec4) and [striped](index.html#sec5sec4) arrangements - * - Transposing between [blocked](index.html#sec5sec4) and [warp-striped](index.html#sec5sec4) arrangements - * - Scattering ranked items to a [blocked arrangement](index.html#sec5sec4) - * - Scattering ranked items to a [striped arrangement](index.html#sec5sec4) - * - * \tparam T The data type to be exchanged. - * \tparam BLOCK_THREADS The thread block size in threads. - * \tparam ITEMS_PER_THREAD The number of items partitioned onto each thread. - * \tparam WARP_TIME_SLICING [optional] When \p true, only use enough shared memory for a single warp's worth of tile data, time-slicing the block-wide exchange over multiple synchronized rounds. Yields a smaller memory footprint at the expense of decreased parallelism. (Default: false) - * - * \par A Simple Example - * \blockcollective{BlockExchange} - * \par - * The code snippet below illustrates the conversion from a "blocked" to a "striped" arrangement - * of 512 integer items partitioned across 128 threads where each thread owns 4 items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, ...) - * { - * // Specialize BlockExchange for 128 threads owning 4 integer items each - * typedef cub::BlockExchange BlockExchange; - * - * // Allocate shared memory for BlockExchange - * __shared__ typename BlockExchange::TempStorage temp_storage; - * - * // Load a tile of data striped across threads - * int thread_data[4]; - * cub::LoadStriped(threadIdx.x, d_data, thread_data); - * - * // Collectively exchange data into a blocked arrangement across threads - * BlockExchange(temp_storage).StripedToBlocked(thread_data); - * - * \endcode - * \par - * Suppose the set of striped input \p thread_data across the block of threads is - * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] }. - * The corresponding output \p thread_data in those threads will be - * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. - * - * \par Performance Considerations - * - Proper device-specific padding ensures zero bank conflicts for most types. - * - */ -template < - typename T, - int BLOCK_THREADS, - int ITEMS_PER_THREAD, - bool WARP_TIME_SLICING = false> -class BlockExchange -{ -private: - - /****************************************************************************** - * Constants - ******************************************************************************/ - - enum - { - LOG_WARP_THREADS = PtxArchProps::LOG_WARP_THREADS, - WARP_THREADS = 1 << LOG_WARP_THREADS, - WARPS = (BLOCK_THREADS + PtxArchProps::WARP_THREADS - 1) / PtxArchProps::WARP_THREADS, - - LOG_SMEM_BANKS = PtxArchProps::LOG_SMEM_BANKS, - SMEM_BANKS = 1 << LOG_SMEM_BANKS, - - TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - - TIME_SLICES = (WARP_TIME_SLICING) ? WARPS : 1, - - TIME_SLICED_THREADS = (WARP_TIME_SLICING) ? CUB_MIN(BLOCK_THREADS, WARP_THREADS) : BLOCK_THREADS, - TIME_SLICED_ITEMS = TIME_SLICED_THREADS * ITEMS_PER_THREAD, - - WARP_TIME_SLICED_THREADS = CUB_MIN(BLOCK_THREADS, WARP_THREADS), - WARP_TIME_SLICED_ITEMS = WARP_TIME_SLICED_THREADS * ITEMS_PER_THREAD, - - // Insert padding if the number of items per thread is a power of two - INSERT_PADDING = ((ITEMS_PER_THREAD & (ITEMS_PER_THREAD - 1)) == 0), - PADDING_ITEMS = (INSERT_PADDING) ? (TIME_SLICED_ITEMS >> LOG_SMEM_BANKS) : 0, - }; - - /****************************************************************************** - * Type definitions - ******************************************************************************/ - - /// Shared memory storage layout type - typedef T _TempStorage[TIME_SLICED_ITEMS + PADDING_ITEMS]; - -public: - - /// \smemstorage{BlockExchange} - struct TempStorage : Uninitialized<_TempStorage> {}; - -private: - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Shared storage reference - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - int warp_lane; - int warp_id; - int warp_offset; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /// Internal storage allocator - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ _TempStorage private_storage; - return private_storage; - } - - - /** - * Transposes data items from blocked arrangement to striped arrangement. Specialized for no timeslicing. - */ - __device__ __forceinline__ void BlockedToStriped( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between blocked and striped arrangements. - Int2Type time_slicing) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_storage[item_offset] = items[ITEM]; - } - - __syncthreads(); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - items[ITEM] = temp_storage[item_offset]; - } - } - - - /** - * Transposes data items from blocked arrangement to striped arrangement. Specialized for warp-timeslicing. - */ - __device__ __forceinline__ void BlockedToStriped( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between blocked and striped arrangements. - Int2Type time_slicing) - { - T temp_items[ITEMS_PER_THREAD]; - - #pragma unroll - for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) - { - const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; - const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; - - __syncthreads(); - - if (warp_id == SLICE) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = (warp_lane * ITEMS_PER_THREAD) + ITEM; - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_storage[item_offset] = items[ITEM]; - } - } - - __syncthreads(); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - // Read a strip of items - const int STRIP_OFFSET = ITEM * BLOCK_THREADS; - const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; - - if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) - { - int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; - if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) - { - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_items[ITEM] = temp_storage[item_offset]; - } - } - } - } - - // Copy - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = temp_items[ITEM]; - } - } - - - /** - * Transposes data items from blocked arrangement to warp-striped arrangement. Specialized for no timeslicing - */ - __device__ __forceinline__ void BlockedToWarpStriped( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between blocked and warp-striped arrangements. - Int2Type time_slicing) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = warp_offset + ITEM + (warp_lane * ITEMS_PER_THREAD); - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_storage[item_offset] = items[ITEM]; - } - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = warp_offset + (ITEM * WARP_TIME_SLICED_THREADS) + warp_lane; - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - items[ITEM] = temp_storage[item_offset]; - } - } - - /** - * Transposes data items from blocked arrangement to warp-striped arrangement. Specialized for warp-timeslicing - */ - __device__ __forceinline__ void BlockedToWarpStriped( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between blocked and warp-striped arrangements. - Int2Type time_slicing) - { - #pragma unroll - for (int SLICE = 0; SLICE < TIME_SLICES; ++SLICE) - { - __syncthreads(); - - if (warp_id == SLICE) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = ITEM + (warp_lane * ITEMS_PER_THREAD); - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_storage[item_offset] = items[ITEM]; - } - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + warp_lane; - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - items[ITEM] = temp_storage[item_offset]; - } - } - } - } - - - /** - * Transposes data items from striped arrangement to blocked arrangement. Specialized for no timeslicing. - */ - __device__ __forceinline__ void StripedToBlocked( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. - Int2Type time_slicing) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_storage[item_offset] = items[ITEM]; - } - - __syncthreads(); - - // No timeslicing - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - items[ITEM] = temp_storage[item_offset]; - } - } - - - /** - * Transposes data items from striped arrangement to blocked arrangement. Specialized for warp-timeslicing. - */ - __device__ __forceinline__ void StripedToBlocked( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. - Int2Type time_slicing) - { - // Warp time-slicing - T temp_items[ITEMS_PER_THREAD]; - - #pragma unroll - for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) - { - const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; - const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; - - __syncthreads(); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - // Write a strip of items - const int STRIP_OFFSET = ITEM * BLOCK_THREADS; - const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; - - if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) - { - int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; - if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) - { - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_storage[item_offset] = items[ITEM]; - } - } - } - - __syncthreads(); - - if (warp_id == SLICE) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = (warp_lane * ITEMS_PER_THREAD) + ITEM; - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_items[ITEM] = temp_storage[item_offset]; - } - } - } - - // Copy - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = temp_items[ITEM]; - } - } - - - /** - * Transposes data items from warp-striped arrangement to blocked arrangement. Specialized for no timeslicing - */ - __device__ __forceinline__ void WarpStripedToBlocked( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between warp-striped and blocked arrangements. - Int2Type time_slicing) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = warp_offset + (ITEM * WARP_TIME_SLICED_THREADS) + warp_lane; - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_storage[item_offset] = items[ITEM]; - } - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = warp_offset + ITEM + (warp_lane * ITEMS_PER_THREAD); - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - items[ITEM] = temp_storage[item_offset]; - } - } - - - /** - * Transposes data items from warp-striped arrangement to blocked arrangement. Specialized for warp-timeslicing - */ - __device__ __forceinline__ void WarpStripedToBlocked( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between warp-striped and blocked arrangements. - Int2Type time_slicing) - { - #pragma unroll - for (int SLICE = 0; SLICE < TIME_SLICES; ++SLICE) - { - __syncthreads(); - - if (warp_id == SLICE) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + warp_lane; - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_storage[item_offset] = items[ITEM]; - } - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = ITEM + (warp_lane * ITEMS_PER_THREAD); - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - items[ITEM] = temp_storage[item_offset]; - } - } - } - } - - - /** - * Exchanges data items annotated by rank into blocked arrangement. Specialized for no timeslicing. - */ - __device__ __forceinline__ void ScatterToBlocked( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange - int ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks - Int2Type time_slicing) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = ranks[ITEM]; - if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); - temp_storage[item_offset] = items[ITEM]; - } - - __syncthreads(); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; - if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); - items[ITEM] = temp_storage[item_offset]; - } - } - - /** - * Exchanges data items annotated by rank into blocked arrangement. Specialized for warp-timeslicing. - */ - __device__ __forceinline__ void ScatterToBlocked( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange - int ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks - Int2Type time_slicing) - { - T temp_items[ITEMS_PER_THREAD]; - - #pragma unroll - for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) - { - __syncthreads(); - - const int SLICE_OFFSET = TIME_SLICED_ITEMS * SLICE; - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = ranks[ITEM] - SLICE_OFFSET; - if ((item_offset >= 0) && (item_offset < WARP_TIME_SLICED_ITEMS)) - { - if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); - temp_storage[item_offset] = items[ITEM]; - } - } - - __syncthreads(); - - if (warp_id == SLICE) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = (warp_lane * ITEMS_PER_THREAD) + ITEM; - if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); - temp_items[ITEM] = temp_storage[item_offset]; - } - } - } - - // Copy - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = temp_items[ITEM]; - } - } - - - /** - * Exchanges data items annotated by rank into striped arrangement. Specialized for no timeslicing. - */ - __device__ __forceinline__ void ScatterToStriped( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange - int ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks - Int2Type time_slicing) - { - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = ranks[ITEM]; - if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); - temp_storage[item_offset] = items[ITEM]; - } - - __syncthreads(); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; - if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); - items[ITEM] = temp_storage[item_offset]; - } - } - - - /** - * Exchanges data items annotated by rank into striped arrangement. Specialized for warp-timeslicing. - */ - __device__ __forceinline__ void ScatterToStriped( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange - int ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks - Int2Type time_slicing) - { - T temp_items[ITEMS_PER_THREAD]; - - #pragma unroll - for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) - { - const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; - const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; - - __syncthreads(); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - int item_offset = ranks[ITEM] - SLICE_OFFSET; - if ((item_offset >= 0) && (item_offset < WARP_TIME_SLICED_ITEMS)) - { - if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); - temp_storage[item_offset] = items[ITEM]; - } - } - - __syncthreads(); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - // Read a strip of items - const int STRIP_OFFSET = ITEM * BLOCK_THREADS; - const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; - - if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) - { - int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; - if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) - { - if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; - temp_items[ITEM] = temp_storage[item_offset]; - } - } - } - } - - // Copy - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = temp_items[ITEM]; - } - } - - -public: - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockExchange() - : - temp_storage(PrivateStorage()), - linear_tid(threadIdx.x), - warp_lane(linear_tid & (WARP_THREADS - 1)), - warp_id(linear_tid >> LOG_WARP_THREADS), - warp_offset(warp_id * WARP_TIME_SLICED_ITEMS) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockExchange( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - linear_tid(threadIdx.x), - warp_lane(linear_tid & (WARP_THREADS - 1)), - warp_id(linear_tid >> LOG_WARP_THREADS), - warp_offset(warp_id * WARP_TIME_SLICED_ITEMS) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Each thread is identified using the supplied linear thread identifier - */ - __device__ __forceinline__ BlockExchange( - int linear_tid) ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(PrivateStorage()), - linear_tid(linear_tid), - warp_lane(linear_tid & (WARP_THREADS - 1)), - warp_id(linear_tid >> LOG_WARP_THREADS), - warp_offset(warp_id * WARP_TIME_SLICED_ITEMS) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Each thread is identified using the supplied linear thread identifier. - */ - __device__ __forceinline__ BlockExchange( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int linear_tid) ///< [in] [optional] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid), - warp_lane(linear_tid & (WARP_THREADS - 1)), - warp_id(linear_tid >> LOG_WARP_THREADS), - warp_offset(warp_id * WARP_TIME_SLICED_ITEMS) - {} - - - //@} end member group - /******************************************************************//** - * \name Structured exchanges - *********************************************************************/ - //@{ - - /** - * \brief Transposes data items from striped arrangement to blocked arrangement. - * - * \smemreuse - * - * The code snippet below illustrates the conversion from a "striped" to a "blocked" arrangement - * of 512 integer items partitioned across 128 threads where each thread owns 4 items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, ...) - * { - * // Specialize BlockExchange for 128 threads owning 4 integer items each - * typedef cub::BlockExchange BlockExchange; - * - * // Allocate shared memory for BlockExchange - * __shared__ typename BlockExchange::TempStorage temp_storage; - * - * // Load a tile of ordered data into a striped arrangement across block threads - * int thread_data[4]; - * cub::LoadStriped(threadIdx.x, d_data, thread_data); - * - * // Collectively exchange data into a blocked arrangement across threads - * BlockExchange(temp_storage).StripedToBlocked(thread_data); - * - * \endcode - * \par - * Suppose the set of striped input \p thread_data across the block of threads is - * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] } after loading from global memory. - * The corresponding output \p thread_data in those threads will be - * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. - * - */ - __device__ __forceinline__ void StripedToBlocked( - T items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. - { - StripedToBlocked(items, Int2Type()); - } - - /** - * \brief Transposes data items from blocked arrangement to striped arrangement. - * - * \smemreuse - * - * The code snippet below illustrates the conversion from a "blocked" to a "striped" arrangement - * of 512 integer items partitioned across 128 threads where each thread owns 4 items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, ...) - * { - * // Specialize BlockExchange for 128 threads owning 4 integer items each - * typedef cub::BlockExchange BlockExchange; - * - * // Allocate shared memory for BlockExchange - * __shared__ typename BlockExchange::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively exchange data into a striped arrangement across threads - * BlockExchange(temp_storage).BlockedToStriped(thread_data); - * - * // Store data striped across block threads into an ordered tile - * cub::StoreStriped(threadIdx.x, d_data, thread_data); - * - * \endcode - * \par - * Suppose the set of blocked input \p thread_data across the block of threads is - * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. - * The corresponding output \p thread_data in those threads will be - * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] } in - * preparation for storing to global memory. - * - */ - __device__ __forceinline__ void BlockedToStriped( - T items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between blocked and striped arrangements. - { - BlockedToStriped(items, Int2Type()); - } - - - /** - * \brief Transposes data items from warp-striped arrangement to blocked arrangement. - * - * \smemreuse - * - * The code snippet below illustrates the conversion from a "warp-striped" to a "blocked" arrangement - * of 512 integer items partitioned across 128 threads where each thread owns 4 items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, ...) - * { - * // Specialize BlockExchange for 128 threads owning 4 integer items each - * typedef cub::BlockExchange BlockExchange; - * - * // Allocate shared memory for BlockExchange - * __shared__ typename BlockExchange::TempStorage temp_storage; - * - * // Load a tile of ordered data into a warp-striped arrangement across warp threads - * int thread_data[4]; - * cub::LoadSWarptriped(threadIdx.x, d_data, thread_data); - * - * // Collectively exchange data into a blocked arrangement across threads - * BlockExchange(temp_storage).WarpStripedToBlocked(thread_data); - * - * \endcode - * \par - * Suppose the set of warp-striped input \p thread_data across the block of threads is - * { [0,32,64,96], [1,33,65,97], [2,34,66,98], ..., [415,447,479,511] } - * after loading from global memory. (The first 128 items are striped across - * the first warp of 32 threads, the second 128 items are striped across the second warp, etc.) - * The corresponding output \p thread_data in those threads will be - * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. - * - */ - __device__ __forceinline__ void WarpStripedToBlocked( - T items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between warp-striped and blocked arrangements. - { - WarpStripedToBlocked(items, Int2Type()); - } - - /** - * \brief Transposes data items from blocked arrangement to warp-striped arrangement. - * - * \smemreuse - * - * The code snippet below illustrates the conversion from a "blocked" to a "warp-striped" arrangement - * of 512 integer items partitioned across 128 threads where each thread owns 4 items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, ...) - * { - * // Specialize BlockExchange for 128 threads owning 4 integer items each - * typedef cub::BlockExchange BlockExchange; - * - * // Allocate shared memory for BlockExchange - * __shared__ typename BlockExchange::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively exchange data into a warp-striped arrangement across threads - * BlockExchange(temp_storage).BlockedToWarpStriped(thread_data); - * - * // Store data striped across warp threads into an ordered tile - * cub::StoreStriped(threadIdx.x, d_data, thread_data); - * - * \endcode - * \par - * Suppose the set of blocked input \p thread_data across the block of threads is - * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. - * The corresponding output \p thread_data in those threads will be - * { [0,32,64,96], [1,33,65,97], [2,34,66,98], ..., [415,447,479,511] } - * in preparation for storing to global memory. (The first 128 items are striped across - * the first warp of 32 threads, the second 128 items are striped across the second warp, etc.) - * - */ - __device__ __forceinline__ void BlockedToWarpStriped( - T items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between blocked and warp-striped arrangements. - { - BlockedToWarpStriped(items, Int2Type()); - } - - - //@} end member group - /******************************************************************//** - * \name Scatter exchanges - *********************************************************************/ - //@{ - - - /** - * \brief Exchanges data items annotated by rank into blocked arrangement. - * - * \smemreuse - */ - __device__ __forceinline__ void ScatterToBlocked( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange - int ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks - { - ScatterToBlocked(items, ranks, Int2Type()); - } - - - /** - * \brief Exchanges data items annotated by rank into striped arrangement. - * - * \smemreuse - */ - __device__ __forceinline__ void ScatterToStriped( - T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange - int ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks - { - ScatterToStriped(items, ranks, Int2Type()); - } - - //@} end member group - - -}; - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/block_histogram.cuh b/kokkos/kokkos/TPL/cub/block/block_histogram.cuh deleted file mode 100644 index dd346e3..0000000 --- a/kokkos/kokkos/TPL/cub/block/block_histogram.cuh +++ /dev/null @@ -1,414 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * The cub::BlockHistogram class provides [collective](index.html#sec0) methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. - */ - -#pragma once - -#include "specializations/block_histogram_sort.cuh" -#include "specializations/block_histogram_atomic.cuh" -#include "../util_arch.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Algorithmic variants - ******************************************************************************/ - -/** - * \brief BlockHistogramAlgorithm enumerates alternative algorithms for the parallel construction of block-wide histograms. - */ -enum BlockHistogramAlgorithm -{ - - /** - * \par Overview - * Sorting followed by differentiation. Execution is comprised of two phases: - * -# Sort the data using efficient radix sort - * -# Look for "runs" of same-valued keys by detecting discontinuities; the run-lengths are histogram bin counts. - * - * \par Performance Considerations - * Delivers consistent throughput regardless of sample bin distribution. - */ - BLOCK_HISTO_SORT, - - - /** - * \par Overview - * Use atomic addition to update byte counts directly - * - * \par Performance Considerations - * Performance is strongly tied to the hardware implementation of atomic - * addition, and may be significantly degraded for non uniformly-random - * input distributions where many concurrent updates are likely to be - * made to the same bin counter. - */ - BLOCK_HISTO_ATOMIC, -}; - - - -/****************************************************************************** - * Block histogram - ******************************************************************************/ - - -/** - * \brief The BlockHistogram class provides [collective](index.html#sec0) methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. ![](histogram_logo.png) - * \ingroup BlockModule - * - * \par Overview - * A histogram - * counts the number of observations that fall into each of the disjoint categories (known as bins). - * - * \par - * Optionally, BlockHistogram can be specialized to use different algorithms: - * -# cub::BLOCK_HISTO_SORT. Sorting followed by differentiation. [More...](\ref cub::BlockHistogramAlgorithm) - * -# cub::BLOCK_HISTO_ATOMIC. Use atomic addition to update byte counts directly. [More...](\ref cub::BlockHistogramAlgorithm) - * - * \tparam T The sample type being histogrammed (must be castable to an integer bin identifier) - * \tparam BLOCK_THREADS The thread block size in threads - * \tparam ITEMS_PER_THREAD The number of items per thread - * \tparam BINS The number bins within the histogram - * \tparam ALGORITHM [optional] cub::BlockHistogramAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_HISTO_SORT) - * - * \par A Simple Example - * \blockcollective{BlockHistogram} - * \par - * The code snippet below illustrates a 256-bin histogram of 512 integer samples that - * are partitioned across 128 threads where each thread owns 4 samples. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize a 256-bin BlockHistogram type for 128 threads having 4 character samples each - * typedef cub::BlockHistogram BlockHistogram; - * - * // Allocate shared memory for BlockHistogram - * __shared__ typename BlockHistogram::TempStorage temp_storage; - * - * // Allocate shared memory for block-wide histogram bin counts - * __shared__ unsigned int smem_histogram[256]; - * - * // Obtain input samples per thread - * unsigned char data[4]; - * ... - * - * // Compute the block-wide histogram - * BlockHistogram(temp_storage).Histogram(data, smem_histogram); - * - * \endcode - * - * \par Performance and Usage Considerations - * - The histogram output can be constructed in shared or global memory - * - See cub::BlockHistogramAlgorithm for performance details regarding algorithmic alternatives - * - */ -template < - typename T, - int BLOCK_THREADS, - int ITEMS_PER_THREAD, - int BINS, - BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT> -class BlockHistogram -{ -private: - - /****************************************************************************** - * Constants and type definitions - ******************************************************************************/ - - /** - * Ensure the template parameterization meets the requirements of the - * targeted device architecture. BLOCK_HISTO_ATOMIC can only be used - * on version SM120 or later. Otherwise BLOCK_HISTO_SORT is used - * regardless. - */ - static const BlockHistogramAlgorithm SAFE_ALGORITHM = - ((ALGORITHM == BLOCK_HISTO_ATOMIC) && (CUB_PTX_ARCH < 120)) ? - BLOCK_HISTO_SORT : - ALGORITHM; - - /// Internal specialization. - typedef typename If<(SAFE_ALGORITHM == BLOCK_HISTO_SORT), - BlockHistogramSort, - BlockHistogramAtomic >::Type InternalBlockHistogram; - - /// Shared memory storage layout type for BlockHistogram - typedef typename InternalBlockHistogram::TempStorage _TempStorage; - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Shared storage reference - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /// Internal storage allocator - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ _TempStorage private_storage; - return private_storage; - } - - -public: - - /// \smemstorage{BlockHistogram} - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockHistogram() - : - temp_storage(PrivateStorage()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockHistogram( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Each thread is identified using the supplied linear thread identifier - */ - __device__ __forceinline__ BlockHistogram( - int linear_tid) ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(PrivateStorage()), - linear_tid(linear_tid) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Each thread is identified using the supplied linear thread identifier. - */ - __device__ __forceinline__ BlockHistogram( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int linear_tid) ///< [in] [optional] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - - //@} end member group - /******************************************************************//** - * \name Histogram operations - *********************************************************************/ - //@{ - - - /** - * \brief Initialize the shared histogram counters to zero. - * - * The code snippet below illustrates a the initialization and update of a - * histogram of 512 integer samples that are partitioned across 128 threads - * where each thread owns 4 samples. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize a 256-bin BlockHistogram type for 128 threads having 4 character samples each - * typedef cub::BlockHistogram BlockHistogram; - * - * // Allocate shared memory for BlockHistogram - * __shared__ typename BlockHistogram::TempStorage temp_storage; - * - * // Allocate shared memory for block-wide histogram bin counts - * __shared__ unsigned int smem_histogram[256]; - * - * // Obtain input samples per thread - * unsigned char thread_samples[4]; - * ... - * - * // Initialize the block-wide histogram - * BlockHistogram(temp_storage).InitHistogram(smem_histogram); - * - * // Update the block-wide histogram - * BlockHistogram(temp_storage).Composite(thread_samples, smem_histogram); - * - * \endcode - * - * \tparam HistoCounter [inferred] Histogram counter type - */ - template - __device__ __forceinline__ void InitHistogram(HistoCounter histogram[BINS]) - { - // Initialize histogram bin counts to zeros - int histo_offset = 0; - - #pragma unroll - for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) - { - histogram[histo_offset + linear_tid] = 0; - } - // Finish up with guarded initialization if necessary - if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) - { - histogram[histo_offset + linear_tid] = 0; - } - } - - - /** - * \brief Constructs a block-wide histogram in shared/global memory. Each thread contributes an array of input elements. - * - * \smemreuse - * - * The code snippet below illustrates a 256-bin histogram of 512 integer samples that - * are partitioned across 128 threads where each thread owns 4 samples. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize a 256-bin BlockHistogram type for 128 threads having 4 character samples each - * typedef cub::BlockHistogram BlockHistogram; - * - * // Allocate shared memory for BlockHistogram - * __shared__ typename BlockHistogram::TempStorage temp_storage; - * - * // Allocate shared memory for block-wide histogram bin counts - * __shared__ unsigned int smem_histogram[256]; - * - * // Obtain input samples per thread - * unsigned char thread_samples[4]; - * ... - * - * // Compute the block-wide histogram - * BlockHistogram(temp_storage).Histogram(thread_samples, smem_histogram); - * - * \endcode - * - * \tparam HistoCounter [inferred] Histogram counter type - */ - template < - typename HistoCounter> - __device__ __forceinline__ void Histogram( - T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram - HistoCounter histogram[BINS]) ///< [out] Reference to shared/global memory histogram - { - // Initialize histogram bin counts to zeros - InitHistogram(histogram); - - // Composite the histogram - InternalBlockHistogram(temp_storage, linear_tid).Composite(items, histogram); - } - - - - /** - * \brief Updates an existing block-wide histogram in shared/global memory. Each thread composites an array of input elements. - * - * \smemreuse - * - * The code snippet below illustrates a the initialization and update of a - * histogram of 512 integer samples that are partitioned across 128 threads - * where each thread owns 4 samples. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize a 256-bin BlockHistogram type for 128 threads having 4 character samples each - * typedef cub::BlockHistogram BlockHistogram; - * - * // Allocate shared memory for BlockHistogram - * __shared__ typename BlockHistogram::TempStorage temp_storage; - * - * // Allocate shared memory for block-wide histogram bin counts - * __shared__ unsigned int smem_histogram[256]; - * - * // Obtain input samples per thread - * unsigned char thread_samples[4]; - * ... - * - * // Initialize the block-wide histogram - * BlockHistogram(temp_storage).InitHistogram(smem_histogram); - * - * // Update the block-wide histogram - * BlockHistogram(temp_storage).Composite(thread_samples, smem_histogram); - * - * \endcode - * - * \tparam HistoCounter [inferred] Histogram counter type - */ - template < - typename HistoCounter> - __device__ __forceinline__ void Composite( - T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram - HistoCounter histogram[BINS]) ///< [out] Reference to shared/global memory histogram - { - InternalBlockHistogram(temp_storage, linear_tid).Composite(items, histogram); - } - -}; - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/block_load.cuh b/kokkos/kokkos/TPL/cub/block/block_load.cuh deleted file mode 100644 index e645bcd..0000000 --- a/kokkos/kokkos/TPL/cub/block/block_load.cuh +++ /dev/null @@ -1,1122 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Operations for reading linear tiles of data into the CUDA thread block. - */ - -#pragma once - -#include - -#include "../util_namespace.cuh" -#include "../util_macro.cuh" -#include "../util_type.cuh" -#include "../util_vector.cuh" -#include "../thread/thread_load.cuh" -#include "block_exchange.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \addtogroup IoModule - * @{ - */ - - -/******************************************************************//** - * \name Blocked I/O - *********************************************************************/ -//@{ - - -/** - * \brief Load a linear segment of items into a blocked arrangement across the thread block using the specified cache modifier. - * - * \blocked - * - * \tparam MODIFIER cub::PtxLoadModifier cache modifier. - * \tparam T [inferred] The data type to load. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam InputIteratorRA [inferred] The random-access iterator type for input (may be a simple pointer type). - */ -template < - PtxLoadModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorRA> -__device__ __forceinline__ void LoadBlocked( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load -{ - // Load directly in thread-blocked order - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = ThreadLoad(block_itr + (linear_tid * ITEMS_PER_THREAD) + ITEM); - } -} - - -/** - * \brief Load a linear segment of items into a blocked arrangement across the thread block using the specified cache modifier, guarded by range. - * - * \blocked - * - * \tparam MODIFIER cub::PtxLoadModifier cache modifier. - * \tparam T [inferred] The data type to load. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam InputIteratorRA [inferred] The random-access iterator type for input (may be a simple pointer type). - */ -template < - PtxLoadModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorRA> -__device__ __forceinline__ void LoadBlocked( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items) ///< [in] Number of valid items to load -{ - int bounds = valid_items - (linear_tid * ITEMS_PER_THREAD); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - if (ITEM < bounds) - { - items[ITEM] = ThreadLoad(block_itr + (linear_tid * ITEMS_PER_THREAD) + ITEM); - } - } -} - - -/** - * \brief Load a linear segment of items into a blocked arrangement across the thread block using the specified cache modifier, guarded by range, with a fall-back assignment of out-of-bound elements.. - * - * \blocked - * - * \tparam MODIFIER cub::PtxLoadModifier cache modifier. - * \tparam T [inferred] The data type to load. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam InputIteratorRA [inferred] The random-access iterator type for input (may be a simple pointer type). - */ -template < - PtxLoadModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorRA> -__device__ __forceinline__ void LoadBlocked( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items, ///< [in] Number of valid items to load - T oob_default) ///< [in] Default value to assign out-of-bound items -{ - int bounds = valid_items - (linear_tid * ITEMS_PER_THREAD); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = (ITEM < bounds) ? - ThreadLoad(block_itr + (linear_tid * ITEMS_PER_THREAD) + ITEM) : - oob_default; - } -} - - - -//@} end member group -/******************************************************************//** - * \name Striped I/O - *********************************************************************/ -//@{ - - -/** - * \brief Load a linear segment of items into a striped arrangement across the thread block using the specified cache modifier. - * - * \striped - * - * \tparam MODIFIER cub::PtxLoadModifier cache modifier. - * \tparam BLOCK_THREADS The thread block size in threads - * \tparam T [inferred] The data type to load. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam InputIteratorRA [inferred] The random-access iterator type for input (may be a simple pointer type). - */ -template < - PtxLoadModifier MODIFIER, - int BLOCK_THREADS, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorRA> -__device__ __forceinline__ void LoadStriped( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load -{ - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = ThreadLoad(block_itr + (ITEM * BLOCK_THREADS) + linear_tid); - } -} - - -/** - * \brief Load a linear segment of items into a striped arrangement across the thread block using the specified cache modifier, guarded by range - * - * \striped - * - * \tparam MODIFIER cub::PtxLoadModifier cache modifier. - * \tparam BLOCK_THREADS The thread block size in threads - * \tparam T [inferred] The data type to load. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam InputIteratorRA [inferred] The random-access iterator type for input (may be a simple pointer type). - */ -template < - PtxLoadModifier MODIFIER, - int BLOCK_THREADS, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorRA> -__device__ __forceinline__ void LoadStriped( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items) ///< [in] Number of valid items to load -{ - int bounds = valid_items - linear_tid; - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - if (ITEM * BLOCK_THREADS < bounds) - { - items[ITEM] = ThreadLoad(block_itr + linear_tid + (ITEM * BLOCK_THREADS)); - } - } -} - - -/** - * \brief Load a linear segment of items into a striped arrangement across the thread block using the specified cache modifier, guarded by range, with a fall-back assignment of out-of-bound elements. - * - * \striped - * - * \tparam MODIFIER cub::PtxLoadModifier cache modifier. - * \tparam BLOCK_THREADS The thread block size in threads - * \tparam T [inferred] The data type to load. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam InputIteratorRA [inferred] The random-access iterator type for input (may be a simple pointer type). - */ -template < - PtxLoadModifier MODIFIER, - int BLOCK_THREADS, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorRA> -__device__ __forceinline__ void LoadStriped( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items, ///< [in] Number of valid items to load - T oob_default) ///< [in] Default value to assign out-of-bound items -{ - int bounds = valid_items - linear_tid; - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = (ITEM * BLOCK_THREADS < bounds) ? - ThreadLoad(block_itr + linear_tid + (ITEM * BLOCK_THREADS)) : - oob_default; - } -} - - - -//@} end member group -/******************************************************************//** - * \name Warp-striped I/O - *********************************************************************/ -//@{ - - -/** - * \brief Load a linear segment of items into a warp-striped arrangement across the thread block using the specified cache modifier. - * - * \warpstriped - * - * \par Usage Considerations - * The number of threads in the thread block must be a multiple of the architecture's warp size. - * - * \tparam MODIFIER cub::PtxLoadModifier cache modifier. - * \tparam T [inferred] The data type to load. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam InputIteratorRA [inferred] The random-access iterator type for input (may be a simple pointer type). - */ -template < - PtxLoadModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorRA> -__device__ __forceinline__ void LoadWarpStriped( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load -{ - int tid = linear_tid & (PtxArchProps::WARP_THREADS - 1); - int wid = linear_tid >> PtxArchProps::LOG_WARP_THREADS; - int warp_offset = wid * PtxArchProps::WARP_THREADS * ITEMS_PER_THREAD; - - // Load directly in warp-striped order - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = ThreadLoad(block_itr + warp_offset + tid + (ITEM * PtxArchProps::WARP_THREADS)); - } -} - - -/** - * \brief Load a linear segment of items into a warp-striped arrangement across the thread block using the specified cache modifier, guarded by range - * - * \warpstriped - * - * \par Usage Considerations - * The number of threads in the thread block must be a multiple of the architecture's warp size. - * - * \tparam MODIFIER cub::PtxLoadModifier cache modifier. - * \tparam T [inferred] The data type to load. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam InputIteratorRA [inferred] The random-access iterator type for input (may be a simple pointer type). - */ -template < - PtxLoadModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorRA> -__device__ __forceinline__ void LoadWarpStriped( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items) ///< [in] Number of valid items to load -{ - int tid = linear_tid & (PtxArchProps::WARP_THREADS - 1); - int wid = linear_tid >> PtxArchProps::LOG_WARP_THREADS; - int warp_offset = wid * PtxArchProps::WARP_THREADS * ITEMS_PER_THREAD; - int bounds = valid_items - warp_offset - tid; - - // Load directly in warp-striped order - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - if ((ITEM * PtxArchProps::WARP_THREADS) < bounds) - { - items[ITEM] = ThreadLoad(block_itr + warp_offset + tid + (ITEM * PtxArchProps::WARP_THREADS)); - } - } -} - - -/** - * \brief Load a linear segment of items into a warp-striped arrangement across the thread block using the specified cache modifier, guarded by range, with a fall-back assignment of out-of-bound elements. - * - * \warpstriped - * - * \par Usage Considerations - * The number of threads in the thread block must be a multiple of the architecture's warp size. - * - * \tparam MODIFIER cub::PtxLoadModifier cache modifier. - * \tparam T [inferred] The data type to load. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam InputIteratorRA [inferred] The random-access iterator type for input (may be a simple pointer type). - */ -template < - PtxLoadModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD, - typename InputIteratorRA> -__device__ __forceinline__ void LoadWarpStriped( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items, ///< [in] Number of valid items to load - T oob_default) ///< [in] Default value to assign out-of-bound items -{ - int tid = linear_tid & (PtxArchProps::WARP_THREADS - 1); - int wid = linear_tid >> PtxArchProps::LOG_WARP_THREADS; - int warp_offset = wid * PtxArchProps::WARP_THREADS * ITEMS_PER_THREAD; - int bounds = valid_items - warp_offset - tid; - - // Load directly in warp-striped order - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = ((ITEM * PtxArchProps::WARP_THREADS) < bounds) ? - ThreadLoad(block_itr + warp_offset + tid + (ITEM * PtxArchProps::WARP_THREADS)) : - oob_default; - } -} - - - -//@} end member group -/******************************************************************//** - * \name Blocked, vectorized I/O - *********************************************************************/ -//@{ - -/** - * \brief Load a linear segment of items into a blocked arrangement across the thread block using the specified cache modifier. - * - * \blocked - * - * The input offset (\p block_ptr + \p block_offset) must be quad-item aligned - * - * The following conditions will prevent vectorization and loading will fall back to cub::BLOCK_LOAD_DIRECT: - * - \p ITEMS_PER_THREAD is odd - * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) - * - * \tparam MODIFIER cub::PtxLoadModifier cache modifier. - * \tparam T [inferred] The data type to load. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - */ -template < - PtxLoadModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD> -__device__ __forceinline__ void LoadBlockedVectorized( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - T *block_ptr, ///< [in] Input pointer for loading from - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load -{ - enum - { - // Maximum CUDA vector size is 4 elements - MAX_VEC_SIZE = CUB_MIN(4, ITEMS_PER_THREAD), - - // Vector size must be a power of two and an even divisor of the items per thread - VEC_SIZE = ((((MAX_VEC_SIZE - 1) & MAX_VEC_SIZE) == 0) && ((ITEMS_PER_THREAD % MAX_VEC_SIZE) == 0)) ? - MAX_VEC_SIZE : - 1, - - VECTORS_PER_THREAD = ITEMS_PER_THREAD / VEC_SIZE, - }; - - // Vector type - typedef typename VectorHelper::Type Vector; - - // Alias local data (use raw_items array here which should get optimized away to prevent conservative PTXAS lmem spilling) - T raw_items[ITEMS_PER_THREAD]; - - // Direct-load using vector types - LoadBlocked( - linear_tid, - reinterpret_cast(block_ptr), - reinterpret_cast(raw_items)); - - // Copy - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = raw_items[ITEM]; - } -} - - -//@} end member group - -/** @} */ // end group IoModule - - - -//----------------------------------------------------------------------------- -// Generic BlockLoad abstraction -//----------------------------------------------------------------------------- - -/** - * \brief cub::BlockLoadAlgorithm enumerates alternative algorithms for cub::BlockLoad to read a linear segment of data from memory into a blocked arrangement across a CUDA thread block. - */ -enum BlockLoadAlgorithm -{ - /** - * \par Overview - * - * A [blocked arrangement](index.html#sec5sec4) of data is read - * directly from memory. The thread block reads items in a parallel "raking" fashion: threadi - * reads the ith segment of consecutive elements. - * - * \par Performance Considerations - * - The utilization of memory transactions (coalescing) decreases as the - * access stride between threads increases (i.e., the number items per thread). - */ - BLOCK_LOAD_DIRECT, - - /** - * \par Overview - * - * A [blocked arrangement](index.html#sec5sec4) of data is read directly - * from memory using CUDA's built-in vectorized loads as a coalescing optimization. - * The thread block reads items in a parallel "raking" fashion: threadi uses vector loads to - * read the ith segment of consecutive elements. - * - * For example, ld.global.v4.s32 instructions will be generated when \p T = \p int and \p ITEMS_PER_THREAD > 4. - * - * \par Performance Considerations - * - The utilization of memory transactions (coalescing) remains high until the the - * access stride between threads (i.e., the number items per thread) exceeds the - * maximum vector load width (typically 4 items or 64B, whichever is lower). - * - The following conditions will prevent vectorization and loading will fall back to cub::BLOCK_LOAD_DIRECT: - * - \p ITEMS_PER_THREAD is odd - * - The \p InputIteratorRA is not a simple pointer type - * - The block input offset is not quadword-aligned - * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) - */ - BLOCK_LOAD_VECTORIZE, - - /** - * \par Overview - * - * A [striped arrangement](index.html#sec5sec4) of data is read - * directly from memory and then is locally transposed into a - * [blocked arrangement](index.html#sec5sec4). The thread block - * reads items in a parallel "strip-mining" fashion: - * threadi reads items having stride \p BLOCK_THREADS - * between them. cub::BlockExchange is then used to locally reorder the items - * into a [blocked arrangement](index.html#sec5sec4). - * - * \par Performance Considerations - * - The utilization of memory transactions (coalescing) remains high regardless - * of items loaded per thread. - * - The local reordering incurs slightly longer latencies and throughput than the - * direct cub::BLOCK_LOAD_DIRECT and cub::BLOCK_LOAD_VECTORIZE alternatives. - */ - BLOCK_LOAD_TRANSPOSE, - - - /** - * \par Overview - * - * A [warp-striped arrangement](index.html#sec5sec4) of data is read - * directly from memory and then is locally transposed into a - * [blocked arrangement](index.html#sec5sec4). Each warp reads its own - * contiguous segment in a parallel "strip-mining" fashion: lanei - * reads items having stride \p WARP_THREADS between them. cub::BlockExchange - * is then used to locally reorder the items into a - * [blocked arrangement](index.html#sec5sec4). - * - * \par Usage Considerations - * - BLOCK_THREADS must be a multiple of WARP_THREADS - * - * \par Performance Considerations - * - The utilization of memory transactions (coalescing) remains high regardless - * of items loaded per thread. - * - The local reordering incurs slightly longer latencies and throughput than the - * direct cub::BLOCK_LOAD_DIRECT and cub::BLOCK_LOAD_VECTORIZE alternatives. - */ - BLOCK_LOAD_WARP_TRANSPOSE, -}; - - -/** - * \brief The BlockLoad class provides [collective](index.html#sec0) data movement methods for loading a linear segment of items from memory into a [blocked arrangement](index.html#sec5sec4) across a CUDA thread block. ![](block_load_logo.png) - * \ingroup BlockModule - * - * \par Overview - * The BlockLoad class provides a single data movement abstraction that can be specialized - * to implement different cub::BlockLoadAlgorithm strategies. This facilitates different - * performance policies for different architectures, data types, granularity sizes, etc. - * - * \par - * Optionally, BlockLoad can be specialized by different data movement strategies: - * -# cub::BLOCK_LOAD_DIRECT. A [blocked arrangement](index.html#sec5sec4) - * of data is read directly from memory. [More...](\ref cub::BlockLoadAlgorithm) - * -# cub::BLOCK_LOAD_VECTORIZE. A [blocked arrangement](index.html#sec5sec4) - * of data is read directly from memory using CUDA's built-in vectorized loads as a - * coalescing optimization. [More...](\ref cub::BlockLoadAlgorithm) - * -# cub::BLOCK_LOAD_TRANSPOSE. A [striped arrangement](index.html#sec5sec4) - * of data is read directly from memory and is then locally transposed into a - * [blocked arrangement](index.html#sec5sec4). [More...](\ref cub::BlockLoadAlgorithm) - * -# cub::BLOCK_LOAD_WARP_TRANSPOSE. A [warp-striped arrangement](index.html#sec5sec4) - * of data is read directly from memory and is then locally transposed into a - * [blocked arrangement](index.html#sec5sec4). [More...](\ref cub::BlockLoadAlgorithm) - * - * \tparam InputIteratorRA The input iterator type (may be a simple pointer type). - * \tparam BLOCK_THREADS The thread block size in threads. - * \tparam ITEMS_PER_THREAD The number of consecutive items partitioned onto each thread. - * \tparam ALGORITHM [optional] cub::BlockLoadAlgorithm tuning policy. default: cub::BLOCK_LOAD_DIRECT. - * \tparam MODIFIER [optional] cub::PtxLoadModifier cache modifier. default: cub::LOAD_DEFAULT. - * \tparam WARP_TIME_SLICING [optional] For transposition-based cub::BlockLoadAlgorithm parameterizations that utilize shared memory: When \p true, only use enough shared memory for a single warp's worth of data, time-slicing the block-wide exchange over multiple synchronized rounds (default: false) - * - * \par A Simple Example - * \blockcollective{BlockLoad} - * \par - * The code snippet below illustrates the loading of a linear - * segment of 512 integers into a "blocked" arrangement across 128 threads where each - * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, - * meaning memory references are efficiently coalesced using a warp-striped access - * pattern (after which items are locally reordered among threads). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, ...) - * { - * // Specialize BlockLoad for 128 threads owning 4 integer items each - * typedef cub::BlockLoad BlockLoad; - * - * // Allocate shared memory for BlockLoad - * __shared__ typename BlockLoad::TempStorage temp_storage; - * - * // Load a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * BlockLoad(temp_storage).Load(d_data, thread_data); - * - * \endcode - * \par - * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, .... - * The set of \p thread_data across the block of threads in those threads will be - * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. - * - */ -template < - typename InputIteratorRA, - int BLOCK_THREADS, - int ITEMS_PER_THREAD, - BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT, - PtxLoadModifier MODIFIER = LOAD_DEFAULT, - bool WARP_TIME_SLICING = false> -class BlockLoad -{ -private: - - /****************************************************************************** - * Constants and typed definitions - ******************************************************************************/ - - // Data type of input iterator - typedef typename std::iterator_traits::value_type T; - - - /****************************************************************************** - * Algorithmic variants - ******************************************************************************/ - - /// Load helper - template - struct LoadInternal; - - - /** - * BLOCK_LOAD_DIRECT specialization of load helper - */ - template - struct LoadInternal - { - /// Shared memory storage layout type - typedef NullType TempStorage; - - /// Linear thread-id - int linear_tid; - - /// Constructor - __device__ __forceinline__ LoadInternal( - TempStorage &temp_storage, - int linear_tid) - : - linear_tid(linear_tid) - {} - - /// Load a linear segment of items from memory - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load - { - LoadBlocked(linear_tid, block_itr, items); - } - - /// Load a linear segment of items from memory, guarded by range - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items) ///< [in] Number of valid items to load - { - LoadBlocked(linear_tid, block_itr, items, valid_items); - } - - /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items, ///< [in] Number of valid items to load - T oob_default) ///< [in] Default value to assign out-of-bound items - { - LoadBlocked(linear_tid, block_itr, items, valid_items, oob_default); - } - - }; - - - /** - * BLOCK_LOAD_VECTORIZE specialization of load helper - */ - template - struct LoadInternal - { - /// Shared memory storage layout type - typedef NullType TempStorage; - - /// Linear thread-id - int linear_tid; - - /// Constructor - __device__ __forceinline__ LoadInternal( - TempStorage &temp_storage, - int linear_tid) - : - linear_tid(linear_tid) - {} - - /// Load a linear segment of items from memory, specialized for native pointer types (attempts vectorization) - __device__ __forceinline__ void Load( - T *block_ptr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load - { - LoadBlockedVectorized(linear_tid, block_ptr, items); - } - - /// Load a linear segment of items from memory, specialized for opaque input iterators (skips vectorization) - template < - typename T, - typename _InputIteratorRA> - __device__ __forceinline__ void Load( - _InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load - { - LoadBlocked(linear_tid, block_itr, items); - } - - /// Load a linear segment of items from memory, guarded by range (skips vectorization) - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items) ///< [in] Number of valid items to load - { - LoadBlocked(linear_tid, block_itr, items, valid_items); - } - - /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements (skips vectorization) - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items, ///< [in] Number of valid items to load - T oob_default) ///< [in] Default value to assign out-of-bound items - { - LoadBlocked(linear_tid, block_itr, items, valid_items, oob_default); - } - - }; - - - /** - * BLOCK_LOAD_TRANSPOSE specialization of load helper - */ - template - struct LoadInternal - { - // BlockExchange utility type for keys - typedef BlockExchange BlockExchange; - - /// Shared memory storage layout type - typedef typename BlockExchange::TempStorage _TempStorage; - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - /// Thread reference to shared storage - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - - /// Constructor - __device__ __forceinline__ LoadInternal( - TempStorage &temp_storage, - int linear_tid) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - /// Load a linear segment of items from memory - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ - { - LoadStriped(linear_tid, block_itr, items); - BlockExchange(temp_storage, linear_tid).StripedToBlocked(items); - } - - /// Load a linear segment of items from memory, guarded by range - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items) ///< [in] Number of valid items to load - { - LoadStriped(linear_tid, block_itr, items, valid_items); - BlockExchange(temp_storage, linear_tid).StripedToBlocked(items); - } - - /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items, ///< [in] Number of valid items to load - T oob_default) ///< [in] Default value to assign out-of-bound items - { - LoadStriped(linear_tid, block_itr, items, valid_items, oob_default); - BlockExchange(temp_storage, linear_tid).StripedToBlocked(items); - } - - }; - - - /** - * BLOCK_LOAD_WARP_TRANSPOSE specialization of load helper - */ - template - struct LoadInternal - { - enum - { - WARP_THREADS = PtxArchProps::WARP_THREADS - }; - - // Assert BLOCK_THREADS must be a multiple of WARP_THREADS - CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); - - // BlockExchange utility type for keys - typedef BlockExchange BlockExchange; - - /// Shared memory storage layout type - typedef typename BlockExchange::TempStorage _TempStorage; - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - /// Thread reference to shared storage - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - - /// Constructor - __device__ __forceinline__ LoadInternal( - TempStorage &temp_storage, - int linear_tid) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - /// Load a linear segment of items from memory - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ - { - LoadWarpStriped(linear_tid, block_itr, items); - BlockExchange(temp_storage, linear_tid).WarpStripedToBlocked(items); - } - - /// Load a linear segment of items from memory, guarded by range - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items) ///< [in] Number of valid items to load - { - LoadWarpStriped(linear_tid, block_itr, items, valid_items); - BlockExchange(temp_storage, linear_tid).WarpStripedToBlocked(items); - } - - - /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items, ///< [in] Number of valid items to load - T oob_default) ///< [in] Default value to assign out-of-bound items - { - LoadWarpStriped(linear_tid, block_itr, items, valid_items, oob_default); - BlockExchange(temp_storage, linear_tid).WarpStripedToBlocked(items); - } - }; - - - /****************************************************************************** - * Type definitions - ******************************************************************************/ - - /// Internal load implementation to use - typedef LoadInternal InternalLoad; - - - /// Shared memory storage layout type - typedef typename InternalLoad::TempStorage _TempStorage; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /// Internal storage allocator - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ _TempStorage private_storage; - return private_storage; - } - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Thread reference to shared storage - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - -public: - - /// \smemstorage{BlockLoad} - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockLoad() - : - temp_storage(PrivateStorage()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockLoad( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Each thread is identified using the supplied linear thread identifier - */ - __device__ __forceinline__ BlockLoad( - int linear_tid) ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(PrivateStorage()), - linear_tid(linear_tid) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Each thread is identified using the supplied linear thread identifier. - */ - __device__ __forceinline__ BlockLoad( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int linear_tid) ///< [in] [optional] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - - - //@} end member group - /******************************************************************//** - * \name Data movement - *********************************************************************/ - //@{ - - - /** - * \brief Load a linear segment of items from memory. - * - * \blocked - * - * The code snippet below illustrates the loading of a linear - * segment of 512 integers into a "blocked" arrangement across 128 threads where each - * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, - * meaning memory references are efficiently coalesced using a warp-striped access - * pattern (after which items are locally reordered among threads). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, ...) - * { - * // Specialize BlockLoad for 128 threads owning 4 integer items each - * typedef cub::BlockLoad BlockLoad; - * - * // Allocate shared memory for BlockLoad - * __shared__ typename BlockLoad::TempStorage temp_storage; - * - * // Load a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * BlockLoad(temp_storage).Load(d_data, thread_data); - * - * \endcode - * \par - * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, .... - * The set of \p thread_data across the block of threads in those threads will be - * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. - * - */ - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load - { - InternalLoad(temp_storage, linear_tid).Load(block_itr, items); - } - - - /** - * \brief Load a linear segment of items from memory, guarded by range. - * - * \blocked - * - * The code snippet below illustrates the guarded loading of a linear - * segment of 512 integers into a "blocked" arrangement across 128 threads where each - * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, - * meaning memory references are efficiently coalesced using a warp-striped access - * pattern (after which items are locally reordered among threads). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, int valid_items, ...) - * { - * // Specialize BlockLoad for 128 threads owning 4 integer items each - * typedef cub::BlockLoad BlockLoad; - * - * // Allocate shared memory for BlockLoad - * __shared__ typename BlockLoad::TempStorage temp_storage; - * - * // Load a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * BlockLoad(temp_storage).Load(d_data, thread_data, valid_items); - * - * \endcode - * \par - * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, 6... and \p valid_items is \p 5. - * The set of \p thread_data across the block of threads in those threads will be - * { [0,1,2,3], [4,?,?,?], ..., [?,?,?,?] }, with only the first two threads - * being unmasked to load portions of valid data (and other items remaining unassigned). - * - */ - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items) ///< [in] Number of valid items to load - { - InternalLoad(temp_storage, linear_tid).Load(block_itr, items, valid_items); - } - - - /** - * \brief Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements - * - * \blocked - * - * The code snippet below illustrates the guarded loading of a linear - * segment of 512 integers into a "blocked" arrangement across 128 threads where each - * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, - * meaning memory references are efficiently coalesced using a warp-striped access - * pattern (after which items are locally reordered among threads). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, int valid_items, ...) - * { - * // Specialize BlockLoad for 128 threads owning 4 integer items each - * typedef cub::BlockLoad BlockLoad; - * - * // Allocate shared memory for BlockLoad - * __shared__ typename BlockLoad::TempStorage temp_storage; - * - * // Load a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * BlockLoad(temp_storage).Load(d_data, thread_data, valid_items, -1); - * - * \endcode - * \par - * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, 6..., - * \p valid_items is \p 5, and the out-of-bounds default is \p -1. - * The set of \p thread_data across the block of threads in those threads will be - * { [0,1,2,3], [4,-1,-1,-1], ..., [-1,-1,-1,-1] }, with only the first two threads - * being unmasked to load portions of valid data (and other items are assigned \p -1) - * - */ - __device__ __forceinline__ void Load( - InputIteratorRA block_itr, ///< [in] The thread block's base input iterator for loading from - T (&items)[ITEMS_PER_THREAD], ///< [out] Data to load - int valid_items, ///< [in] Number of valid items to load - T oob_default) ///< [in] Default value to assign out-of-bound items - { - InternalLoad(temp_storage, linear_tid).Load(block_itr, items, valid_items, oob_default); - } - - - //@} end member group - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/block_radix_rank.cuh b/kokkos/kokkos/TPL/cub/block/block_radix_rank.cuh deleted file mode 100644 index 149a62c..0000000 --- a/kokkos/kokkos/TPL/cub/block/block_radix_rank.cuh +++ /dev/null @@ -1,479 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockRadixRank provides operations for ranking unsigned integer types within a CUDA threadblock - */ - -#pragma once - -#include "../util_arch.cuh" -#include "../util_type.cuh" -#include "../thread/thread_reduce.cuh" -#include "../thread/thread_scan.cuh" -#include "../block/block_scan.cuh" -#include "../util_namespace.cuh" - - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \brief BlockRadixRank provides operations for ranking unsigned integer types within a CUDA threadblock. - * \ingroup BlockModule - * - * \par Overview - * Blah... - * - * \tparam BLOCK_THREADS The thread block size in threads - * \tparam RADIX_BITS [optional] The number of radix bits per digit place (default: 5 bits) - * \tparam MEMOIZE_OUTER_SCAN [optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). See BlockScanAlgorithm::BLOCK_SCAN_RAKING_MEMOIZE for more details. - * \tparam INNER_SCAN_ALGORITHM [optional] The cub::BlockScanAlgorithm algorithm to use (default: cub::BLOCK_SCAN_WARP_SCANS) - * \tparam SMEM_CONFIG [optional] Shared memory bank mode (default: \p cudaSharedMemBankSizeFourByte) - * - * \par Usage Considerations - * - Keys must be in a form suitable for radix ranking (i.e., unsigned bits). - * - Assumes a [blocked arrangement](index.html#sec5sec4) of elements across threads - * - \smemreuse{BlockRadixRank::TempStorage} - * - * \par Performance Considerations - * - * \par Algorithm - * These parallel radix ranking variants have O(n) work complexity and are implemented in XXX phases: - * -# blah - * -# blah - * - * \par Examples - * \par - * - Example 1: Simple radix rank of 32-bit integer keys - * \code - * #include - * - * template - * __global__ void ExampleKernel(...) - * { - * - * \endcode - */ -template < - int BLOCK_THREADS, - int RADIX_BITS, - bool MEMOIZE_OUTER_SCAN = (CUB_PTX_ARCH >= 350) ? true : false, - BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, - cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte> -class BlockRadixRank -{ -private: - - /****************************************************************************** - * Type definitions and constants - ******************************************************************************/ - - // Integer type for digit counters (to be packed into words of type PackedCounters) - typedef unsigned short DigitCounter; - - // Integer type for packing DigitCounters into columns of shared memory banks - typedef typename If<(SMEM_CONFIG == cudaSharedMemBankSizeEightByte), - unsigned long long, - unsigned int>::Type PackedCounter; - - enum - { - RADIX_DIGITS = 1 << RADIX_BITS, - - LOG_WARP_THREADS = PtxArchProps::LOG_WARP_THREADS, - WARP_THREADS = 1 << LOG_WARP_THREADS, - WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, - - BYTES_PER_COUNTER = sizeof(DigitCounter), - LOG_BYTES_PER_COUNTER = Log2::VALUE, - - PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter), - LOG_PACKING_RATIO = Log2::VALUE, - - LOG_COUNTER_LANES = CUB_MAX((RADIX_BITS - LOG_PACKING_RATIO), 0), // Always at least one lane - COUNTER_LANES = 1 << LOG_COUNTER_LANES, - - // The number of packed counters per thread (plus one for padding) - RAKING_SEGMENT = COUNTER_LANES + 1, - - LOG_SMEM_BANKS = PtxArchProps::LOG_SMEM_BANKS, - SMEM_BANKS = 1 << LOG_SMEM_BANKS, - }; - - - /// BlockScan type - typedef BlockScan BlockScan; - - - /// Shared memory storage layout type for BlockRadixRank - struct _TempStorage - { - // Storage for scanning local ranks - typename BlockScan::TempStorage block_scan; - - union - { - DigitCounter digit_counters[COUNTER_LANES + 1][BLOCK_THREADS][PACKING_RATIO]; - PackedCounter raking_grid[BLOCK_THREADS][RAKING_SEGMENT]; - }; - }; - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Shared storage reference - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - - /// Copy of raking segment, promoted to registers - PackedCounter cached_segment[RAKING_SEGMENT]; - - - /****************************************************************************** - * Templated iteration - ******************************************************************************/ - - // General template iteration - template - struct Iterate - { - /** - * Decode keys. Decodes the radix digit from the current digit place - * and increments the thread's corresponding counter in shared - * memory for that digit. - * - * Saves both (1) the prior value of that counter (the key's - * thread-local exclusive prefix sum for that digit), and (2) the shared - * memory offset of the counter (for later use). - */ - template - static __device__ __forceinline__ void DecodeKeys( - BlockRadixRank &cta, // BlockRadixRank instance - UnsignedBits (&keys)[KEYS_PER_THREAD], // Key to decode - DigitCounter (&thread_prefixes)[KEYS_PER_THREAD], // Prefix counter value (out parameter) - DigitCounter* (&digit_counters)[KEYS_PER_THREAD], // Counter smem offset (out parameter) - int current_bit) // The least-significant bit position of the current digit to extract - { - // Add in sub-counter offset - UnsignedBits sub_counter = BFE(keys[COUNT], current_bit + LOG_COUNTER_LANES, LOG_PACKING_RATIO); - - // Add in row offset - UnsignedBits row_offset = BFE(keys[COUNT], current_bit, LOG_COUNTER_LANES); - - // Pointer to smem digit counter - digit_counters[COUNT] = &cta.temp_storage.digit_counters[row_offset][cta.linear_tid][sub_counter]; - - // Load thread-exclusive prefix - thread_prefixes[COUNT] = *digit_counters[COUNT]; - - // Store inclusive prefix - *digit_counters[COUNT] = thread_prefixes[COUNT] + 1; - - // Iterate next key - Iterate::DecodeKeys(cta, keys, thread_prefixes, digit_counters, current_bit); - } - - - // Termination - template - static __device__ __forceinline__ void UpdateRanks( - int (&ranks)[KEYS_PER_THREAD], // Local ranks (out parameter) - DigitCounter (&thread_prefixes)[KEYS_PER_THREAD], // Prefix counter value - DigitCounter* (&digit_counters)[KEYS_PER_THREAD]) // Counter smem offset - { - // Add in threadblock exclusive prefix - ranks[COUNT] = thread_prefixes[COUNT] + *digit_counters[COUNT]; - - // Iterate next key - Iterate::UpdateRanks(ranks, thread_prefixes, digit_counters); - } - }; - - - // Termination - template - struct Iterate - { - // DecodeKeys - template - static __device__ __forceinline__ void DecodeKeys( - BlockRadixRank &cta, - UnsignedBits (&keys)[KEYS_PER_THREAD], - DigitCounter (&thread_prefixes)[KEYS_PER_THREAD], - DigitCounter* (&digit_counters)[KEYS_PER_THREAD], - int current_bit) {} - - - // UpdateRanks - template - static __device__ __forceinline__ void UpdateRanks( - int (&ranks)[KEYS_PER_THREAD], - DigitCounter (&thread_prefixes)[KEYS_PER_THREAD], - DigitCounter *(&digit_counters)[KEYS_PER_THREAD]) {} - }; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /** - * Internal storage allocator - */ - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ _TempStorage private_storage; - return private_storage; - } - - - /** - * Performs upsweep raking reduction, returning the aggregate - */ - __device__ __forceinline__ PackedCounter Upsweep() - { - PackedCounter *smem_raking_ptr = temp_storage.raking_grid[linear_tid]; - PackedCounter *raking_ptr; - - if (MEMOIZE_OUTER_SCAN) - { - // Copy data into registers - #pragma unroll - for (int i = 0; i < RAKING_SEGMENT; i++) - { - cached_segment[i] = smem_raking_ptr[i]; - } - raking_ptr = cached_segment; - } - else - { - raking_ptr = smem_raking_ptr; - } - - return ThreadReduce(raking_ptr, Sum()); - } - - - /// Performs exclusive downsweep raking scan - __device__ __forceinline__ void ExclusiveDownsweep( - PackedCounter raking_partial) - { - PackedCounter *smem_raking_ptr = temp_storage.raking_grid[linear_tid]; - - PackedCounter *raking_ptr = (MEMOIZE_OUTER_SCAN) ? - cached_segment : - smem_raking_ptr; - - // Exclusive raking downsweep scan - ThreadScanExclusive(raking_ptr, raking_ptr, Sum(), raking_partial); - - if (MEMOIZE_OUTER_SCAN) - { - // Copy data back to smem - #pragma unroll - for (int i = 0; i < RAKING_SEGMENT; i++) - { - smem_raking_ptr[i] = cached_segment[i]; - } - } - } - - - /** - * Reset shared memory digit counters - */ - __device__ __forceinline__ void ResetCounters() - { - // Reset shared memory digit counters - #pragma unroll - for (int LANE = 0; LANE < COUNTER_LANES + 1; LANE++) - { - *((PackedCounter*) temp_storage.digit_counters[LANE][linear_tid]) = 0; - } - } - - - /** - * Scan shared memory digit counters. - */ - __device__ __forceinline__ void ScanCounters() - { - // Upsweep scan - PackedCounter raking_partial = Upsweep(); - - // Compute inclusive sum - PackedCounter inclusive_partial; - PackedCounter packed_aggregate; - BlockScan(temp_storage.block_scan, linear_tid).InclusiveSum(raking_partial, inclusive_partial, packed_aggregate); - - // Propagate totals in packed fields - #pragma unroll - for (int PACKED = 1; PACKED < PACKING_RATIO; PACKED++) - { - inclusive_partial += packed_aggregate << (sizeof(DigitCounter) * 8 * PACKED); - } - - // Downsweep scan with exclusive partial - PackedCounter exclusive_partial = inclusive_partial - raking_partial; - ExclusiveDownsweep(exclusive_partial); - } - -public: - - /// \smemstorage{BlockScan} - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockRadixRank() - : - temp_storage(PrivateStorage()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockRadixRank( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Each thread is identified using the supplied linear thread identifier - */ - __device__ __forceinline__ BlockRadixRank( - int linear_tid) ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(PrivateStorage()), - linear_tid(linear_tid) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Each thread is identified using the supplied linear thread identifier. - */ - __device__ __forceinline__ BlockRadixRank( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int linear_tid) ///< [in] [optional] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - - - //@} end member group - /******************************************************************//** - * \name Raking - *********************************************************************/ - //@{ - - /** - * \brief Rank keys. - */ - template < - typename UnsignedBits, - int KEYS_PER_THREAD> - __device__ __forceinline__ void RankKeys( - UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile - int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile - int current_bit) ///< [in] The least-significant bit position of the current digit to extract - { - DigitCounter thread_prefixes[KEYS_PER_THREAD]; // For each key, the count of previous keys in this tile having the same digit - DigitCounter* digit_counters[KEYS_PER_THREAD]; // For each key, the byte-offset of its corresponding digit counter in smem - - // Reset shared memory digit counters - ResetCounters(); - - // Decode keys and update digit counters - Iterate<0, KEYS_PER_THREAD>::DecodeKeys(*this, keys, thread_prefixes, digit_counters, current_bit); - - __syncthreads(); - - // Scan shared memory counters - ScanCounters(); - - __syncthreads(); - - // Extract the local ranks of each key - Iterate<0, KEYS_PER_THREAD>::UpdateRanks(ranks, thread_prefixes, digit_counters); - } - - - /** - * \brief Rank keys. For the lower \p RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. - */ - template < - typename UnsignedBits, - int KEYS_PER_THREAD> - __device__ __forceinline__ void RankKeys( - UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile - int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile (out parameter) - int current_bit, ///< [in] The least-significant bit position of the current digit to extract - int &inclusive_digit_prefix) ///< [out] The incluisve prefix sum for the digit threadIdx.x - { - // Rank keys - RankKeys(keys, ranks, current_bit); - - // Get the inclusive and exclusive digit totals corresponding to the calling thread. - if ((BLOCK_THREADS == RADIX_DIGITS) || (linear_tid < RADIX_DIGITS)) - { - // Obtain ex/inclusive digit counts. (Unfortunately these all reside in the - // first counter column, resulting in unavoidable bank conflicts.) - int counter_lane = (linear_tid & (COUNTER_LANES - 1)); - int sub_counter = linear_tid >> (LOG_COUNTER_LANES); - inclusive_digit_prefix = temp_storage.digit_counters[counter_lane + 1][0][sub_counter]; - } - } -}; - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - - diff --git a/kokkos/kokkos/TPL/cub/block/block_radix_sort.cuh b/kokkos/kokkos/TPL/cub/block/block_radix_sort.cuh deleted file mode 100644 index 873d401..0000000 --- a/kokkos/kokkos/TPL/cub/block/block_radix_sort.cuh +++ /dev/null @@ -1,608 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * The cub::BlockRadixSort class provides [collective](index.html#sec0) methods for radix sorting of items partitioned across a CUDA thread block. - */ - - -#pragma once - -#include "../util_namespace.cuh" -#include "../util_arch.cuh" -#include "../util_type.cuh" -#include "block_exchange.cuh" -#include "block_radix_rank.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \brief The cub::BlockRadixSort class provides [collective](index.html#sec0) methods for sorting items partitioned across a CUDA thread block using a radix sorting method. ![](sorting_logo.png) - * \ingroup BlockModule - * - * \par Overview - * The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges - * items into ascending order. It relies upon a positional representation for - * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, - * characters, etc.) specified from least-significant to most-significant. For a - * given input sequence of keys and a set of rules specifying a total ordering - * of the symbolic alphabet, the radix sorting method produces a lexicographic - * ordering of those keys. - * - * \par - * BlockRadixSort can sort all of the built-in C++ numeric primitive types, e.g.: - * unsigned char, \p int, \p double, etc. Within each key, the implementation treats fixed-length - * bit-sequences of \p RADIX_BITS as radix digit places. Although the direct radix sorting - * method can only be applied to unsigned integral types, BlockRadixSort - * is able to sort signed and floating-point types via simple bit-wise transformations - * that ensure lexicographic key ordering. - * - * \tparam Key Key type - * \tparam BLOCK_THREADS The thread block size in threads - * \tparam ITEMS_PER_THREAD The number of items per thread - * \tparam Value [optional] Value type (default: cub::NullType) - * \tparam RADIX_BITS [optional] The number of radix bits per digit place (default: 4 bits) - * \tparam MEMOIZE_OUTER_SCAN [optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). - * \tparam INNER_SCAN_ALGORITHM [optional] The cub::BlockScanAlgorithm algorithm to use (default: cub::BLOCK_SCAN_WARP_SCANS) - * \tparam SMEM_CONFIG [optional] Shared memory bank mode (default: \p cudaSharedMemBankSizeFourByte) - * - * \par A Simple Example - * \blockcollective{BlockRadixSort} - * \par - * The code snippet below illustrates a sort of 512 integer keys that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockRadixSort for 128 threads owning 4 integer items each - * typedef cub::BlockRadixSort BlockRadixSort; - * - * // Allocate shared memory for BlockRadixSort - * __shared__ typename BlockRadixSort::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_keys[4]; - * ... - * - * // Collectively sort the keys - * BlockRadixSort(temp_storage).Sort(thread_keys); - * - * ... - * \endcode - * \par - * Suppose the set of input \p thread_keys across the block of threads is - * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The - * corresponding output \p thread_keys in those threads will be - * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. - * - */ -template < - typename Key, - int BLOCK_THREADS, - int ITEMS_PER_THREAD, - typename Value = NullType, - int RADIX_BITS = 4, - bool MEMOIZE_OUTER_SCAN = (CUB_PTX_ARCH >= 350) ? true : false, - BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, - cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte> -class BlockRadixSort -{ -private: - - /****************************************************************************** - * Constants and type definitions - ******************************************************************************/ - - // Key traits and unsigned bits type - typedef NumericTraits KeyTraits; - typedef typename KeyTraits::UnsignedBits UnsignedBits; - - /// BlockRadixRank utility type - typedef BlockRadixRank BlockRadixRank; - - /// BlockExchange utility type for keys - typedef BlockExchange BlockExchangeKeys; - - /// BlockExchange utility type for values - typedef BlockExchange BlockExchangeValues; - - /// Shared memory storage layout type - struct _TempStorage - { - union - { - typename BlockRadixRank::TempStorage ranking_storage; - typename BlockExchangeKeys::TempStorage exchange_keys; - typename BlockExchangeValues::TempStorage exchange_values; - }; - }; - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /// Internal storage allocator - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ _TempStorage private_storage; - return private_storage; - } - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Shared storage reference - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - - -public: - - /// \smemstorage{BlockScan} - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockRadixSort() - : - temp_storage(PrivateStorage()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockRadixSort( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Each thread is identified using the supplied linear thread identifier - */ - __device__ __forceinline__ BlockRadixSort( - int linear_tid) ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(PrivateStorage()), - linear_tid(linear_tid) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Each thread is identified using the supplied linear thread identifier. - */ - __device__ __forceinline__ BlockRadixSort( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int linear_tid) ///< [in] [optional] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - - - //@} end member group - /******************************************************************//** - * \name Sorting (blocked arrangements) - *********************************************************************/ - //@{ - - /** - * \brief Performs a block-wide radix sort over a [blocked arrangement](index.html#sec5sec4) of keys. - * - * \smemreuse - * - * The code snippet below illustrates a sort of 512 integer keys that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive keys. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockRadixSort for 128 threads owning 4 integer keys each - * typedef cub::BlockRadixSort BlockRadixSort; - * - * // Allocate shared memory for BlockRadixSort - * __shared__ typename BlockRadixSort::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_keys[4]; - * ... - * - * // Collectively sort the keys - * BlockRadixSort(temp_storage).Sort(thread_keys); - * - * \endcode - * \par - * Suppose the set of input \p thread_keys across the block of threads is - * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. - * The corresponding output \p thread_keys in those threads will be - * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. - */ - __device__ __forceinline__ void Sort( - Key (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort - int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison - int end_bit = sizeof(Key) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison - { - UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] = - reinterpret_cast(keys); - - // Twiddle bits if necessary - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]); - } - - // Radix sorting passes - while (true) - { - // Rank the blocked keys - int ranks[ITEMS_PER_THREAD]; - BlockRadixRank(temp_storage.ranking_storage, linear_tid).RankKeys(unsigned_keys, ranks, begin_bit); - begin_bit += RADIX_BITS; - - __syncthreads(); - - // Exchange keys through shared memory in blocked arrangement - BlockExchangeKeys(temp_storage.exchange_keys, linear_tid).ScatterToBlocked(keys, ranks); - - // Quit if done - if (begin_bit >= end_bit) break; - - __syncthreads(); - } - - // Untwiddle bits if necessary - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]); - } - } - - - /** - * \brief Performs a block-wide radix sort across a [blocked arrangement](index.html#sec5sec4) of keys and values. - * - * BlockRadixSort can only accommodate one associated tile of values. To "truck along" - * more than one tile of values, simply perform a key-value sort of the keys paired - * with a temporary value array that enumerates the key indices. The reordered indices - * can then be used as a gather-vector for exchanging other associated tile data through - * shared memory. - * - * \smemreuse - * - * The code snippet below illustrates a sort of 512 integer keys and values that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive pairs. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockRadixSort for 128 threads owning 4 integer keys and values each - * typedef cub::BlockRadixSort BlockRadixSort; - * - * // Allocate shared memory for BlockRadixSort - * __shared__ typename BlockRadixSort::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_keys[4]; - * int thread_values[4]; - * ... - * - * // Collectively sort the keys and values among block threads - * BlockRadixSort(temp_storage).Sort(thread_keys, thread_values); - * - * \endcode - * \par - * Suppose the set of input \p thread_keys across the block of threads is - * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The - * corresponding output \p thread_keys in those threads will be - * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. - * - */ - __device__ __forceinline__ void Sort( - Key (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort - Value (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort - int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison - int end_bit = sizeof(Key) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison - { - UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] = - reinterpret_cast(keys); - - // Twiddle bits if necessary - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]); - } - - // Radix sorting passes - while (true) - { - // Rank the blocked keys - int ranks[ITEMS_PER_THREAD]; - BlockRadixRank(temp_storage.ranking_storage, linear_tid).RankKeys(unsigned_keys, ranks, begin_bit); - begin_bit += RADIX_BITS; - - __syncthreads(); - - // Exchange keys through shared memory in blocked arrangement - BlockExchangeKeys(temp_storage.exchange_keys, linear_tid).ScatterToBlocked(keys, ranks); - - __syncthreads(); - - // Exchange values through shared memory in blocked arrangement - BlockExchangeValues(temp_storage.exchange_values, linear_tid).ScatterToBlocked(values, ranks); - - // Quit if done - if (begin_bit >= end_bit) break; - - __syncthreads(); - } - - // Untwiddle bits if necessary - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]); - } - } - - - //@} end member group - /******************************************************************//** - * \name Sorting (blocked arrangement -> striped arrangement) - *********************************************************************/ - //@{ - - - /** - * \brief Performs a radix sort across a [blocked arrangement](index.html#sec5sec4) of keys, leaving them in a [striped arrangement](index.html#sec5sec4). - * - * \smemreuse - * - * The code snippet below illustrates a sort of 512 integer keys that - * are initially partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive keys. The final partitioning is striped. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockRadixSort for 128 threads owning 4 integer keys each - * typedef cub::BlockRadixSort BlockRadixSort; - * - * // Allocate shared memory for BlockRadixSort - * __shared__ typename BlockRadixSort::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_keys[4]; - * ... - * - * // Collectively sort the keys - * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys); - * - * \endcode - * \par - * Suppose the set of input \p thread_keys across the block of threads is - * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The - * corresponding output \p thread_keys in those threads will be - * { [0,128,256,384], [1,129,257,385], [2,130,258,386], ..., [127,255,383,511] }. - * - */ - __device__ __forceinline__ void SortBlockedToStriped( - Key (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort - int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison - int end_bit = sizeof(Key) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison - { - UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] = - reinterpret_cast(keys); - - // Twiddle bits if necessary - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]); - } - - // Radix sorting passes - while (true) - { - // Rank the blocked keys - int ranks[ITEMS_PER_THREAD]; - BlockRadixRank(temp_storage.ranking_storage, linear_tid).RankKeys(unsigned_keys, ranks, begin_bit); - begin_bit += RADIX_BITS; - - __syncthreads(); - - // Check if this is the last pass - if (begin_bit >= end_bit) - { - // Last pass exchanges keys through shared memory in striped arrangement - BlockExchangeKeys(temp_storage.exchange_keys, linear_tid).ScatterToStriped(keys, ranks); - - // Quit - break; - } - - // Exchange keys through shared memory in blocked arrangement - BlockExchangeKeys(temp_storage.exchange_keys, linear_tid).ScatterToBlocked(keys, ranks); - - __syncthreads(); - } - - // Untwiddle bits if necessary - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]); - } - } - - - /** - * \brief Performs a radix sort across a [blocked arrangement](index.html#sec5sec4) of keys and values, leaving them in a [striped arrangement](index.html#sec5sec4). - * - * BlockRadixSort can only accommodate one associated tile of values. To "truck along" - * more than one tile of values, simply perform a key-value sort of the keys paired - * with a temporary value array that enumerates the key indices. The reordered indices - * can then be used as a gather-vector for exchanging other associated tile data through - * shared memory. - * - * \smemreuse - * - * The code snippet below illustrates a sort of 512 integer keys and values that - * are initially partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive pairs. The final partitioning is striped. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockRadixSort for 128 threads owning 4 integer keys and values each - * typedef cub::BlockRadixSort BlockRadixSort; - * - * // Allocate shared memory for BlockRadixSort - * __shared__ typename BlockRadixSort::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_keys[4]; - * int thread_values[4]; - * ... - * - * // Collectively sort the keys and values among block threads - * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys, thread_values); - * - * \endcode - * \par - * Suppose the set of input \p thread_keys across the block of threads is - * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The - * corresponding output \p thread_keys in those threads will be - * { [0,128,256,384], [1,129,257,385], [2,130,258,386], ..., [127,255,383,511] }. - * - */ - __device__ __forceinline__ void SortBlockedToStriped( - Key (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort - Value (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort - int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison - int end_bit = sizeof(Key) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison - { - UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] = - reinterpret_cast(keys); - - // Twiddle bits if necessary - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]); - } - - // Radix sorting passes - while (true) - { - // Rank the blocked keys - int ranks[ITEMS_PER_THREAD]; - BlockRadixRank(temp_storage.ranking_storage, linear_tid).RankKeys(unsigned_keys, ranks, begin_bit); - begin_bit += RADIX_BITS; - - __syncthreads(); - - // Check if this is the last pass - if (begin_bit >= end_bit) - { - // Last pass exchanges keys through shared memory in striped arrangement - BlockExchangeKeys(temp_storage.exchange_keys, linear_tid).ScatterToStriped(keys, ranks); - - __syncthreads(); - - // Last pass exchanges through shared memory in striped arrangement - BlockExchangeValues(temp_storage.exchange_values, linear_tid).ScatterToStriped(values, ranks); - - // Quit - break; - } - - // Exchange keys through shared memory in blocked arrangement - BlockExchangeKeys(temp_storage.exchange_keys, linear_tid).ScatterToBlocked(keys, ranks); - - __syncthreads(); - - // Exchange values through shared memory in blocked arrangement - BlockExchangeValues(temp_storage.exchange_values, linear_tid).ScatterToBlocked(values, ranks); - - __syncthreads(); - } - - // Untwiddle bits if necessary - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]); - } - } - - - //@} end member group - -}; - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/block_raking_layout.cuh b/kokkos/kokkos/TPL/cub/block/block_raking_layout.cuh deleted file mode 100644 index 878a786..0000000 --- a/kokkos/kokkos/TPL/cub/block/block_raking_layout.cuh +++ /dev/null @@ -1,145 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockRakingLayout provides a conflict-free shared memory layout abstraction for warp-raking across thread block data. - */ - - -#pragma once - -#include "../util_macro.cuh" -#include "../util_arch.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \brief BlockRakingLayout provides a conflict-free shared memory layout abstraction for raking across thread block data. ![](raking.png) - * \ingroup BlockModule - * - * \par Overview - * This type facilitates a shared memory usage pattern where a block of CUDA - * threads places elements into shared memory and then reduces the active - * parallelism to one "raking" warp of threads for serially aggregating consecutive - * sequences of shared items. Padding is inserted to eliminate bank conflicts - * (for most data types). - * - * \tparam T The data type to be exchanged. - * \tparam BLOCK_THREADS The thread block size in threads. - * \tparam BLOCK_STRIPS When strip-mining, the number of threadblock-strips per tile - */ -template < - typename T, - int BLOCK_THREADS, - int BLOCK_STRIPS = 1> -struct BlockRakingLayout -{ - //--------------------------------------------------------------------- - // Constants and typedefs - //--------------------------------------------------------------------- - - enum - { - /// The total number of elements that need to be cooperatively reduced - SHARED_ELEMENTS = - BLOCK_THREADS * BLOCK_STRIPS, - - /// Maximum number of warp-synchronous raking threads - MAX_RAKING_THREADS = - CUB_MIN(BLOCK_THREADS, PtxArchProps::WARP_THREADS), - - /// Number of raking elements per warp-synchronous raking thread (rounded up) - SEGMENT_LENGTH = - (SHARED_ELEMENTS + MAX_RAKING_THREADS - 1) / MAX_RAKING_THREADS, - - /// Never use a raking thread that will have no valid data (e.g., when BLOCK_THREADS is 62 and SEGMENT_LENGTH is 2, we should only use 31 raking threads) - RAKING_THREADS = - (SHARED_ELEMENTS + SEGMENT_LENGTH - 1) / SEGMENT_LENGTH, - - /// Pad each segment length with one element if it evenly divides the number of banks - SEGMENT_PADDING = - (PtxArchProps::SMEM_BANKS % SEGMENT_LENGTH == 0) ? 1 : 0, - - /// Total number of elements in the raking grid - GRID_ELEMENTS = - RAKING_THREADS * (SEGMENT_LENGTH + SEGMENT_PADDING), - - /// Whether or not we need bounds checking during raking (the number of reduction elements is not a multiple of the warp size) - UNGUARDED = - (SHARED_ELEMENTS % RAKING_THREADS == 0), - }; - - - /** - * \brief Shared memory storage type - */ - typedef T TempStorage[BlockRakingLayout::GRID_ELEMENTS]; - - - /** - * \brief Returns the location for the calling thread to place data into the grid - */ - static __device__ __forceinline__ T* PlacementPtr( - TempStorage &temp_storage, - int linear_tid, - int block_strip = 0) - { - // Offset for partial - unsigned int offset = (block_strip * BLOCK_THREADS) + linear_tid; - - // Add in one padding element for every segment - if (SEGMENT_PADDING > 0) - { - offset += offset / SEGMENT_LENGTH; - } - - // Incorporating a block of padding partials every shared memory segment - return temp_storage + offset; - } - - - /** - * \brief Returns the location for the calling thread to begin sequential raking - */ - static __device__ __forceinline__ T* RakingPtr( - TempStorage &temp_storage, - int linear_tid) - { - return temp_storage + (linear_tid * (SEGMENT_LENGTH + SEGMENT_PADDING)); - } -}; - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/block_reduce.cuh b/kokkos/kokkos/TPL/cub/block/block_reduce.cuh deleted file mode 100644 index ffdff73..0000000 --- a/kokkos/kokkos/TPL/cub/block/block_reduce.cuh +++ /dev/null @@ -1,563 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * The cub::BlockReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread block. - */ - -#pragma once - -#include "specializations/block_reduce_raking.cuh" -#include "specializations/block_reduce_warp_reductions.cuh" -#include "../util_type.cuh" -#include "../thread/thread_operators.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - - -/****************************************************************************** - * Algorithmic variants - ******************************************************************************/ - -/** - * BlockReduceAlgorithm enumerates alternative algorithms for parallel - * reduction across a CUDA threadblock. - */ -enum BlockReduceAlgorithm -{ - - /** - * \par Overview - * An efficient "raking" reduction algorithm. Execution is comprised of - * three phases: - * -# Upsweep sequential reduction in registers (if threads contribute more - * than one input each). Each thread then places the partial reduction - * of its item(s) into shared memory. - * -# Upsweep sequential reduction in shared memory. Threads within a - * single warp rake across segments of shared partial reductions. - * -# A warp-synchronous Kogge-Stone style reduction within the raking warp. - * - * \par - * \image html block_reduce.png - *
\p BLOCK_REDUCE_RAKING data flow for a hypothetical 16-thread threadblock and 4-thread raking warp.
- * - * \par Performance Considerations - * - Although this variant may suffer longer turnaround latencies when the - * GPU is under-occupied, it can often provide higher overall throughput - * across the GPU when suitably occupied. - */ - BLOCK_REDUCE_RAKING, - - - /** - * \par Overview - * A quick "tiled warp-reductions" reduction algorithm. Execution is - * comprised of four phases: - * -# Upsweep sequential reduction in registers (if threads contribute more - * than one input each). Each thread then places the partial reduction - * of its item(s) into shared memory. - * -# Compute a shallow, but inefficient warp-synchronous Kogge-Stone style - * reduction within each warp. - * -# A propagation phase where the warp reduction outputs in each warp are - * updated with the aggregate from each preceding warp. - * - * \par - * \image html block_scan_warpscans.png - *
\p BLOCK_REDUCE_WARP_REDUCTIONS data flow for a hypothetical 16-thread threadblock and 4-thread raking warp.
- * - * \par Performance Considerations - * - Although this variant may suffer lower overall throughput across the - * GPU because due to a heavy reliance on inefficient warp-reductions, it - * can often provide lower turnaround latencies when the GPU is - * under-occupied. - */ - BLOCK_REDUCE_WARP_REDUCTIONS, -}; - - -/****************************************************************************** - * Block reduce - ******************************************************************************/ - -/** - * \brief The BlockReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread block. ![](reduce_logo.png) - * \ingroup BlockModule - * - * \par Overview - * A reduction (or fold) - * uses a binary combining operator to compute a single aggregate from a list of input elements. - * - * \par - * Optionally, BlockReduce can be specialized by algorithm to accommodate different latency/throughput workload profiles: - * -# cub::BLOCK_REDUCE_RAKING. An efficient "raking" reduction algorithm. [More...](\ref cub::BlockReduceAlgorithm) - * -# cub::BLOCK_REDUCE_WARP_REDUCTIONS. A quick "tiled warp-reductions" reduction algorithm. [More...](\ref cub::BlockReduceAlgorithm) - * - * \tparam T Data type being reduced - * \tparam BLOCK_THREADS The thread block size in threads - * \tparam ALGORITHM [optional] cub::BlockReduceAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_REDUCE_RAKING) - * - * \par Performance Considerations - * - Very efficient (only one synchronization barrier). - * - Zero bank conflicts for most types. - * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: - * - Summation (vs. generic reduction) - * - \p BLOCK_THREADS is a multiple of the architecture's warp size - * - Every thread has a valid input (i.e., full vs. partial-tiles) - * - See cub::BlockReduceAlgorithm for performance details regarding algorithmic alternatives - * - * \par A Simple Example - * \blockcollective{BlockReduce} - * \par - * The code snippet below illustrates a sum reduction of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockReduce for 128 threads on type int - * typedef cub::BlockReduce BlockReduce; - * - * // Allocate shared memory for BlockReduce - * __shared__ typename BlockReduce::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Compute the block-wide sum for thread0 - * int aggregate = BlockReduce(temp_storage).Sum(thread_data); - * - * \endcode - * - */ -template < - typename T, - int BLOCK_THREADS, - BlockReduceAlgorithm ALGORITHM = BLOCK_REDUCE_RAKING> -class BlockReduce -{ -private: - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - /// Internal specialization. - typedef typename If<(ALGORITHM == BLOCK_REDUCE_WARP_REDUCTIONS), - BlockReduceWarpReductions, - BlockReduceRaking >::Type InternalBlockReduce; - - /// Shared memory storage layout type for BlockReduce - typedef typename InternalBlockReduce::TempStorage _TempStorage; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /// Internal storage allocator - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ _TempStorage private_storage; - return private_storage; - } - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Shared storage reference - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - - -public: - - /// \smemstorage{BlockReduce} - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockReduce() - : - temp_storage(PrivateStorage()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockReduce( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Each thread is identified using the supplied linear thread identifier - */ - __device__ __forceinline__ BlockReduce( - int linear_tid) ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(PrivateStorage()), - linear_tid(linear_tid) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Each thread is identified using the supplied linear thread identifier. - */ - __device__ __forceinline__ BlockReduce( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int linear_tid) ///< [in] [optional] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - - - //@} end member group - /******************************************************************//** - * \name Generic reductions - *********************************************************************/ - //@{ - - - /** - * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes one input element. - * - * The return value is undefined in threads other than thread0. - * - * Supports non-commutative reduction operators. - * - * \smemreuse - * - * The code snippet below illustrates a max reduction of 128 integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockReduce for 128 threads on type int - * typedef cub::BlockReduce BlockReduce; - * - * // Allocate shared memory for BlockReduce - * __shared__ typename BlockReduce::TempStorage temp_storage; - * - * // Each thread obtains an input item - * int thread_data; - * ... - * - * // Compute the block-wide max for thread0 - * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max()); - * - * \endcode - * - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ T Reduce( - T input, ///< [in] Calling thread's input - ReductionOp reduction_op) ///< [in] Binary reduction operator - { - return InternalBlockReduce(temp_storage, linear_tid).template Reduce(input, BLOCK_THREADS, reduction_op); - } - - - /** - * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes an array of consecutive input elements. - * - * The return value is undefined in threads other than thread0. - * - * Supports non-commutative reduction operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates a max reduction of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockReduce for 128 threads on type int - * typedef cub::BlockReduce BlockReduce; - * - * // Allocate shared memory for BlockReduce - * __shared__ typename BlockReduce::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Compute the block-wide max for thread0 - * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max()); - * - * \endcode - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ - template < - int ITEMS_PER_THREAD, - typename ReductionOp> - __device__ __forceinline__ T Reduce( - T (&inputs)[ITEMS_PER_THREAD], ///< [in] Calling thread's input segment - ReductionOp reduction_op) ///< [in] Binary reduction operator - { - // Reduce partials - T partial = ThreadReduce(inputs, reduction_op); - return Reduce(partial, reduction_op); - } - - - /** - * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. The first \p num_valid threads each contribute one input element. - * - * The return value is undefined in threads other than thread0. - * - * Supports non-commutative reduction operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates a max reduction of a partially-full tile of integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int num_valid, ...) - * { - * // Specialize BlockReduce for 128 threads on type int - * typedef cub::BlockReduce BlockReduce; - * - * // Allocate shared memory for BlockReduce - * __shared__ typename BlockReduce::TempStorage temp_storage; - * - * // Each thread obtains an input item - * int thread_data; - * if (threadIdx.x < num_valid) thread_data = ... - * - * // Compute the block-wide max for thread0 - * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max(), num_valid); - * - * \endcode - * - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ T Reduce( - T input, ///< [in] Calling thread's input - ReductionOp reduction_op, ///< [in] Binary reduction operator - int num_valid) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS) - { - // Determine if we scan skip bounds checking - if (num_valid >= BLOCK_THREADS) - { - return InternalBlockReduce(temp_storage, linear_tid).template Reduce(input, num_valid, reduction_op); - } - else - { - return InternalBlockReduce(temp_storage, linear_tid).template Reduce(input, num_valid, reduction_op); - } - } - - - //@} end member group - /******************************************************************//** - * \name Summation reductions - *********************************************************************/ - //@{ - - - /** - * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes one input element. - * - * The return value is undefined in threads other than thread0. - * - * \smemreuse - * - * The code snippet below illustrates a sum reduction of 128 integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockReduce for 128 threads on type int - * typedef cub::BlockReduce BlockReduce; - * - * // Allocate shared memory for BlockReduce - * __shared__ typename BlockReduce::TempStorage temp_storage; - * - * // Each thread obtains an input item - * int thread_data; - * ... - * - * // Compute the block-wide sum for thread0 - * int aggregate = BlockReduce(temp_storage).Sum(thread_data); - * - * \endcode - * - */ - __device__ __forceinline__ T Sum( - T input) ///< [in] Calling thread's input - { - return InternalBlockReduce(temp_storage, linear_tid).template Sum(input, BLOCK_THREADS); - } - - /** - * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes an array of consecutive input elements. - * - * The return value is undefined in threads other than thread0. - * - * \smemreuse - * - * The code snippet below illustrates a sum reduction of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockReduce for 128 threads on type int - * typedef cub::BlockReduce BlockReduce; - * - * // Allocate shared memory for BlockReduce - * __shared__ typename BlockReduce::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Compute the block-wide sum for thread0 - * int aggregate = BlockReduce(temp_storage).Sum(thread_data); - * - * \endcode - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - */ - template - __device__ __forceinline__ T Sum( - T (&inputs)[ITEMS_PER_THREAD]) ///< [in] Calling thread's input segment - { - // Reduce partials - T partial = ThreadReduce(inputs, cub::Sum()); - return Sum(partial); - } - - - /** - * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. The first \p num_valid threads each contribute one input element. - * - * The return value is undefined in threads other than thread0. - * - * \smemreuse - * - * The code snippet below illustrates a sum reduction of a partially-full tile of integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int num_valid, ...) - * { - * // Specialize BlockReduce for 128 threads on type int - * typedef cub::BlockReduce BlockReduce; - * - * // Allocate shared memory for BlockReduce - * __shared__ typename BlockReduce::TempStorage temp_storage; - * - * // Each thread obtains an input item (up to num_items) - * int thread_data; - * if (threadIdx.x < num_valid) - * thread_data = ... - * - * // Compute the block-wide sum for thread0 - * int aggregate = BlockReduce(temp_storage).Sum(thread_data, num_valid); - * - * \endcode - * - */ - __device__ __forceinline__ T Sum( - T input, ///< [in] Calling thread's input - int num_valid) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS) - { - // Determine if we scan skip bounds checking - if (num_valid >= BLOCK_THREADS) - { - return InternalBlockReduce(temp_storage, linear_tid).template Sum(input, num_valid); - } - else - { - return InternalBlockReduce(temp_storage, linear_tid).template Sum(input, num_valid); - } - } - - - //@} end member group -}; - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/block_scan.cuh b/kokkos/kokkos/TPL/cub/block/block_scan.cuh deleted file mode 100644 index 1c1a2da..0000000 --- a/kokkos/kokkos/TPL/cub/block/block_scan.cuh +++ /dev/null @@ -1,2233 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * The cub::BlockScan class provides [collective](index.html#sec0) methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. - */ - -#pragma once - -#include "specializations/block_scan_raking.cuh" -#include "specializations/block_scan_warp_scans.cuh" -#include "../util_arch.cuh" -#include "../util_type.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Algorithmic variants - ******************************************************************************/ - -/** - * \brief BlockScanAlgorithm enumerates alternative algorithms for cub::BlockScan to compute a parallel prefix scan across a CUDA thread block. - */ -enum BlockScanAlgorithm -{ - - /** - * \par Overview - * An efficient "raking reduce-then-scan" prefix scan algorithm. Execution is comprised of five phases: - * -# Upsweep sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory. - * -# Upsweep sequential reduction in shared memory. Threads within a single warp rake across segments of shared partial reductions. - * -# A warp-synchronous Kogge-Stone style exclusive scan within the raking warp. - * -# Downsweep sequential exclusive scan in shared memory. Threads within a single warp rake across segments of shared partial reductions, seeded with the warp-scan output. - * -# Downsweep sequential scan in registers (if threads contribute more than one input), seeded with the raking scan output. - * - * \par - * \image html block_scan_raking.png - *
\p BLOCK_SCAN_RAKING data flow for a hypothetical 16-thread threadblock and 4-thread raking warp.
- * - * \par Performance Considerations - * - Although this variant may suffer longer turnaround latencies when the - * GPU is under-occupied, it can often provide higher overall throughput - * across the GPU when suitably occupied. - */ - BLOCK_SCAN_RAKING, - - - /** - * \par Overview - * Similar to cub::BLOCK_SCAN_RAKING, but with fewer shared memory reads at - * the expense of higher register pressure. Raking threads preserve their - * "upsweep" segment of values in registers while performing warp-synchronous - * scan, allowing the "downsweep" not to re-read them from shared memory. - */ - BLOCK_SCAN_RAKING_MEMOIZE, - - - /** - * \par Overview - * A quick "tiled warpscans" prefix scan algorithm. Execution is comprised of four phases: - * -# Upsweep sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory. - * -# Compute a shallow, but inefficient warp-synchronous Kogge-Stone style scan within each warp. - * -# A propagation phase where the warp scan outputs in each warp are updated with the aggregate from each preceding warp. - * -# Downsweep sequential scan in registers (if threads contribute more than one input), seeded with the raking scan output. - * - * \par - * \image html block_scan_warpscans.png - *
\p BLOCK_SCAN_WARP_SCANS data flow for a hypothetical 16-thread threadblock and 4-thread raking warp.
- * - * \par Performance Considerations - * - Although this variant may suffer lower overall throughput across the - * GPU because due to a heavy reliance on inefficient warpscans, it can - * often provide lower turnaround latencies when the GPU is under-occupied. - */ - BLOCK_SCAN_WARP_SCANS, -}; - - -/****************************************************************************** - * Block scan - ******************************************************************************/ - -/** - * \brief The BlockScan class provides [collective](index.html#sec0) methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. ![](block_scan_logo.png) - * \ingroup BlockModule - * - * \par Overview - * Given a list of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) - * produces an output list where each element is computed to be the reduction - * of the elements occurring earlier in the input list. Prefix sum - * connotes a prefix scan with the addition operator. The term \em inclusive indicates - * that the ith output reduction incorporates the ith input. - * The term \em exclusive indicates the ith input is not incorporated into - * the ith output reduction. - * - * \par - * Optionally, BlockScan can be specialized by algorithm to accommodate different latency/throughput workload profiles: - * -# cub::BLOCK_SCAN_RAKING. An efficient "raking reduce-then-scan" prefix scan algorithm. [More...](\ref cub::BlockScanAlgorithm) - * -# cub::BLOCK_SCAN_WARP_SCANS. A quick "tiled warpscans" prefix scan algorithm. [More...](\ref cub::BlockScanAlgorithm) - * - * \tparam T Data type being scanned - * \tparam BLOCK_THREADS The thread block size in threads - * \tparam ALGORITHM [optional] cub::BlockScanAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_SCAN_RAKING) - * - * \par A Simple Example - * \blockcollective{BlockScan} - * \par - * The code snippet below illustrates an exclusive prefix sum of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute the block-wide exclusive prefix sum - * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is - * { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. - * The corresponding output \p thread_data in those threads will be - * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. - * - * \par Performance Considerations - * - Uses special instructions when applicable (e.g., warp \p SHFL) - * - Uses synchronization-free communication between warp lanes when applicable - * - Uses only one or two block-wide synchronization barriers (depending on - * algorithm selection) - * - Zero bank conflicts for most types - * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: - * - Prefix sum variants (vs. generic scan) - * - Exclusive variants (vs. inclusive) - * - \p BLOCK_THREADS is a multiple of the architecture's warp size - * - See cub::BlockScanAlgorithm for performance details regarding algorithmic alternatives - * - */ -template < - typename T, - int BLOCK_THREADS, - BlockScanAlgorithm ALGORITHM = BLOCK_SCAN_RAKING> -class BlockScan -{ -private: - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - /** - * Ensure the template parameterization meets the requirements of the - * specified algorithm. Currently, the BLOCK_SCAN_WARP_SCANS policy - * cannot be used with threadblock sizes not a multiple of the - * architectural warp size. - */ - static const BlockScanAlgorithm SAFE_ALGORITHM = - ((ALGORITHM == BLOCK_SCAN_WARP_SCANS) && (BLOCK_THREADS % PtxArchProps::WARP_THREADS != 0)) ? - BLOCK_SCAN_RAKING : - ALGORITHM; - - /// Internal specialization. - typedef typename If<(SAFE_ALGORITHM == BLOCK_SCAN_WARP_SCANS), - BlockScanWarpScans, - BlockScanRaking >::Type InternalBlockScan; - - - /// Shared memory storage layout type for BlockScan - typedef typename InternalBlockScan::TempStorage _TempStorage; - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Shared storage reference - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /// Internal storage allocator - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ _TempStorage private_storage; - return private_storage; - } - - -public: - - /// \smemstorage{BlockScan} - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockScan() - : - temp_storage(PrivateStorage()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockScan( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Each thread is identified using the supplied linear thread identifier - */ - __device__ __forceinline__ BlockScan( - int linear_tid) ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(PrivateStorage()), - linear_tid(linear_tid) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Each thread is identified using the supplied linear thread identifier. - */ - __device__ __forceinline__ BlockScan( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int linear_tid) ///< [in] [optional] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - - - //@} end member group - /******************************************************************//** - * \name Exclusive prefix sum operations - *********************************************************************/ - //@{ - - - /** - * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an exclusive prefix sum of 128 integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain input item for each thread - * int thread_data; - * ... - * - * // Collectively compute the block-wide exclusive prefix sum - * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The - * corresponding output \p thread_data in those threads will be 0, 1, ..., 127. - * - */ - __device__ __forceinline__ void ExclusiveSum( - T input, ///< [in] Calling thread's input item - T &output) ///< [out] Calling thread's output item (may be aliased to \p input) - { - T block_aggregate; - InternalBlockScan(temp_storage, linear_tid).ExclusiveSum(input, output, block_aggregate); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an exclusive prefix sum of 128 integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain input item for each thread - * int thread_data; - * ... - * - * // Collectively compute the block-wide exclusive prefix sum - * int block_aggregate; - * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data, block_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The - * corresponding output \p thread_data in those threads will be 0, 1, ..., 127. - * Furthermore the value \p 128 will be stored in \p block_aggregate for all threads. - * - */ - __device__ __forceinline__ void ExclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate) ///< [out] block-wide aggregate reduction of input items - { - InternalBlockScan(temp_storage, linear_tid).ExclusiveSum(input, output, block_aggregate); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * The \p block_prefix_op functor must implement a member function T operator()(T block_aggregate). - * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the first warp of threads in the block, however only the return value from - * lane0 is applied as the block-wide prefix. Can be stateful. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates a single thread block that progressively - * computes an exclusive prefix sum over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 128 integer items that are partitioned across 128 threads. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct BlockPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ BlockPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the first warp of threads in the block. - * // Thread-0 is responsible for returning a value for seeding the block-wide scan. - * __device__ int operator()(int block_aggregate) - * { - * int old_prefix = running_total; - * running_total += block_aggregate; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize BlockScan for 128 threads - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Initialize running total - * BlockPrefixOp prefix_op(0); - * - * // Have the block iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 128) - * { - * // Load a segment of consecutive items that are blocked across threads - * int thread_data = d_data[block_offset]; - * - * // Collectively compute the block-wide exclusive prefix sum - * int block_aggregate; - * BlockScan(temp_storage).ExclusiveSum( - * thread_data, thread_data, block_aggregate, prefix_op); - * __syncthreads(); - * - * // Store scanned items to output segment - * d_data[block_offset] = thread_data; - * } - * \endcode - * \par - * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... - * The corresponding output for the first segment will be 0, 1, ..., 127. - * The output for the second segment will be 128, 129, ..., 255. Furthermore, - * the value \p 128 will be stored in \p block_aggregate for all threads after each scan. - * - * \tparam BlockPrefixOp [inferred] Call-back functor type having member T operator()(T block_aggregate) - */ - template - __device__ __forceinline__ void ExclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate, ///< [out] block-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to all inputs. - { - InternalBlockScan(temp_storage, linear_tid).ExclusiveSum(input, output, block_aggregate, block_prefix_op); - } - - - //@} end member group - /******************************************************************//** - * \name Exclusive prefix sum operations (multiple data per thread) - *********************************************************************/ - //@{ - - - /** - * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an exclusive prefix sum of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute the block-wide exclusive prefix sum - * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The - * corresponding output \p thread_data in those threads will be { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - */ - template - __device__ __forceinline__ void ExclusiveSum( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD]) ///< [out] Calling thread's output items (may be aliased to \p input) - { - // Reduce consecutive thread items in registers - Sum scan_op; - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveSum(thread_partial, thread_partial); - - // Exclusive scan in registers with prefix - ThreadScanExclusive(input, output, scan_op, thread_partial); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an exclusive prefix sum of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute the block-wide exclusive prefix sum - * int block_aggregate; - * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data, block_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The - * corresponding output \p thread_data in those threads will be { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. - * Furthermore the value \p 512 will be stored in \p block_aggregate for all threads. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - */ - template - __device__ __forceinline__ void ExclusiveSum( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - T &block_aggregate) ///< [out] block-wide aggregate reduction of input items - { - // Reduce consecutive thread items in registers - Sum scan_op; - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveSum(thread_partial, thread_partial, block_aggregate); - - // Exclusive scan in registers with prefix - ThreadScanExclusive(input, output, scan_op, thread_partial); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * The \p block_prefix_op functor must implement a member function T operator()(T block_aggregate). - * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the first warp of threads in the block, however only the return value from - * lane0 is applied as the block-wide prefix. Can be stateful. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates a single thread block that progressively - * computes an exclusive prefix sum over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 512 integer items that are partitioned in a [blocked arrangement](index.html#sec5sec4) - * across 128 threads where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct BlockPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ BlockPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the first warp of threads in the block. - * // Thread-0 is responsible for returning a value for seeding the block-wide scan. - * __device__ int operator()(int block_aggregate) - * { - * int old_prefix = running_total; - * running_total += block_aggregate; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize BlockLoad, BlockStore, and BlockScan for 128 threads, 4 ints per thread - * typedef cub::BlockLoad BlockLoad; - * typedef cub::BlockStore BlockStore; - * typedef cub::BlockScan BlockScan; - * - * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan - * __shared__ union { - * typename BlockLoad::TempStorage load; - * typename BlockScan::TempStorage scan; - * typename BlockStore::TempStorage store; - * } temp_storage; - * - * // Initialize running total - * BlockPrefixOp prefix_op(0); - * - * // Have the block iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) - * { - * // Load a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); - * __syncthreads(); - * - * // Collectively compute the block-wide exclusive prefix sum - * int block_aggregate; - * BlockScan(temp_storage.scan).ExclusiveSum( - * thread_data, thread_data, block_aggregate, prefix_op); - * __syncthreads(); - * - * // Store scanned items to output segment - * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); - * __syncthreads(); - * } - * \endcode - * \par - * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... - * The corresponding output for the first segment will be 0, 1, 2, 3, ..., 510, 511. - * The output for the second segment will be 512, 513, 514, 515, ..., 1022, 1023. Furthermore, - * the value \p 512 will be stored in \p block_aggregate for all threads after each scan. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam BlockPrefixOp [inferred] Call-back functor type having member T operator()(T block_aggregate) - */ - template < - int ITEMS_PER_THREAD, - typename BlockPrefixOp> - __device__ __forceinline__ void ExclusiveSum( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - T &block_aggregate, ///< [out] block-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to all inputs. - { - // Reduce consecutive thread items in registers - Sum scan_op; - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveSum(thread_partial, thread_partial, block_aggregate, block_prefix_op); - - // Exclusive scan in registers with prefix - ThreadScanExclusive(input, output, scan_op, thread_partial); - } - - - - //@} end member group // Inclusive prefix sums - /******************************************************************//** - * \name Exclusive prefix scan operations - *********************************************************************/ - //@{ - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an exclusive prefix max scan of 128 integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain input item for each thread - * int thread_data; - * ... - * - * // Collectively compute the block-wide exclusive prefix max scan - * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The - * corresponding output \p thread_data in those threads will be INT_MIN, 0, 0, 2, ..., 124, 126. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T identity, ///< [in] Identity value - ScanOp scan_op) ///< [in] Binary scan operator - { - T block_aggregate; - InternalBlockScan(temp_storage, linear_tid).ExclusiveScan(input, output, identity, scan_op, block_aggregate); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an exclusive prefix max scan of 128 integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain input item for each thread - * int thread_data; - * ... - * - * // Collectively compute the block-wide exclusive prefix max scan - * int block_aggregate; - * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), block_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The - * corresponding output \p thread_data in those threads will be INT_MIN, 0, 0, 2, ..., 124, 126. - * Furthermore the value \p 126 will be stored in \p block_aggregate for all threads. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input items - T &output, ///< [out] Calling thread's output items (may be aliased to \p input) - const T &identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] block-wide aggregate reduction of input items - { - InternalBlockScan(temp_storage, linear_tid).ExclusiveScan(input, output, identity, scan_op, block_aggregate); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * The \p block_prefix_op functor must implement a member function T operator()(T block_aggregate). - * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the first warp of threads in the block, however only the return value from - * lane0 is applied as the block-wide prefix. Can be stateful. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates a single thread block that progressively - * computes an exclusive prefix max scan over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 128 integer items that are partitioned across 128 threads. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct BlockPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ BlockPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the first warp of threads in the block. - * // Thread-0 is responsible for returning a value for seeding the block-wide scan. - * __device__ int operator()(int block_aggregate) - * { - * int old_prefix = running_total; - * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize BlockScan for 128 threads - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Initialize running total - * BlockPrefixOp prefix_op(INT_MIN); - * - * // Have the block iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 128) - * { - * // Load a segment of consecutive items that are blocked across threads - * int thread_data = d_data[block_offset]; - * - * // Collectively compute the block-wide exclusive prefix max scan - * int block_aggregate; - * BlockScan(temp_storage).ExclusiveScan( - * thread_data, thread_data, INT_MIN, cub::Max(), block_aggregate, prefix_op); - * __syncthreads(); - * - * // Store scanned items to output segment - * d_data[block_offset] = thread_data; - * } - * \endcode - * \par - * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... - * The corresponding output for the first segment will be INT_MIN, 0, 0, 2, ..., 124, 126. - * The output for the second segment will be 126, 128, 128, 130, ..., 252, 254. Furthermore, - * \p block_aggregate will be assigned \p 126 in all threads after the first scan, assigned \p 254 after the second - * scan, etc. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - * \tparam BlockPrefixOp [inferred] Call-back functor type having member T operator()(T block_aggregate) - */ - template < - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] block-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to all inputs. - { - InternalBlockScan(temp_storage, linear_tid).ExclusiveScan(input, output, identity, scan_op, block_aggregate, block_prefix_op); - } - - - //@} end member group // Inclusive prefix sums - /******************************************************************//** - * \name Exclusive prefix scan operations (multiple data per thread) - *********************************************************************/ - //@{ - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an exclusive prefix max scan of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute the block-wide exclusive prefix max scan - * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is - * { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. - * The corresponding output \p thread_data in those threads will be - * { [INT_MIN,0,0,2], [2,4,4,6], ..., [506,508,508,510] }. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template < - int ITEMS_PER_THREAD, - typename ScanOp> - __device__ __forceinline__ void ExclusiveScan( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - const T &identity, ///< [in] Identity value - ScanOp scan_op) ///< [in] Binary scan operator - { - // Reduce consecutive thread items in registers - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveScan(thread_partial, thread_partial, identity, scan_op); - - // Exclusive scan in registers with prefix - ThreadScanExclusive(input, output, scan_op, thread_partial); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an exclusive prefix max scan of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute the block-wide exclusive prefix max scan - * int block_aggregate; - * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), block_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. The - * corresponding output \p thread_data in those threads will be { [INT_MIN,0,0,2], [2,4,4,6], ..., [506,508,508,510] }. - * Furthermore the value \p 510 will be stored in \p block_aggregate for all threads. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template < - int ITEMS_PER_THREAD, - typename ScanOp> - __device__ __forceinline__ void ExclusiveScan( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - const T &identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] block-wide aggregate reduction of input items - { - // Reduce consecutive thread items in registers - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveScan(thread_partial, thread_partial, identity, scan_op, block_aggregate); - - // Exclusive scan in registers with prefix - ThreadScanExclusive(input, output, scan_op, thread_partial); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * The \p block_prefix_op functor must implement a member function T operator()(T block_aggregate). - * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the first warp of threads in the block, however only the return value from - * lane0 is applied as the block-wide prefix. Can be stateful. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates a single thread block that progressively - * computes an exclusive prefix max scan over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 128 integer items that are partitioned across 128 threads. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct BlockPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ BlockPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the first warp of threads in the block. - * // Thread-0 is responsible for returning a value for seeding the block-wide scan. - * __device__ int operator()(int block_aggregate) - * { - * int old_prefix = running_total; - * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize BlockLoad, BlockStore, and BlockScan for 128 threads, 4 ints per thread - * typedef cub::BlockLoad BlockLoad; - * typedef cub::BlockStore BlockStore; - * typedef cub::BlockScan BlockScan; - * - * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan - * __shared__ union { - * typename BlockLoad::TempStorage load; - * typename BlockScan::TempStorage scan; - * typename BlockStore::TempStorage store; - * } temp_storage; - * - * // Initialize running total - * BlockPrefixOp prefix_op(0); - * - * // Have the block iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) - * { - * // Load a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); - * __syncthreads(); - * - * // Collectively compute the block-wide exclusive prefix max scan - * int block_aggregate; - * BlockScan(temp_storage.scan).ExclusiveScan( - * thread_data, thread_data, INT_MIN, cub::Max(), block_aggregate, prefix_op); - * __syncthreads(); - * - * // Store scanned items to output segment - * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); - * __syncthreads(); - * } - * \endcode - * \par - * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... - * The corresponding output for the first segment will be INT_MIN, 0, 0, 2, 2, 4, ..., 508, 510. - * The output for the second segment will be 510, 512, 512, 514, 514, 516, ..., 1020, 1022. Furthermore, - * \p block_aggregate will be assigned \p 510 in all threads after the first scan, assigned \p 1022 after the second - * scan, etc. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - * \tparam BlockPrefixOp [inferred] Call-back functor type having member T operator()(T block_aggregate) - */ - template < - int ITEMS_PER_THREAD, - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void ExclusiveScan( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - T identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] block-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to all inputs. - { - // Reduce consecutive thread items in registers - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveScan(thread_partial, thread_partial, identity, scan_op, block_aggregate, block_prefix_op); - - // Exclusive scan in registers with prefix - ThreadScanExclusive(input, output, scan_op, thread_partial); - } - - - //@} end member group - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - /******************************************************************//** - * \name Exclusive prefix scan operations (identityless, single datum per thread) - *********************************************************************/ - //@{ - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no identity value, the output computed for thread0 is undefined. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op) ///< [in] Binary scan operator - { - T block_aggregate; - InternalBlockScan(temp_storage, linear_tid).ExclusiveScan(input, output, scan_op, block_aggregate); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no identity value, the output computed for thread0 is undefined. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] block-wide aggregate reduction of input items - { - InternalBlockScan(temp_storage, linear_tid).ExclusiveScan(input, output, scan_op, block_aggregate); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * The \p block_prefix_op functor must implement a member function T operator()(T block_aggregate). - * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the first warp of threads in the block, however only the return value from - * lane0 is applied as the block-wide prefix. Can be stateful. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - * \tparam BlockPrefixOp [inferred] Call-back functor type having member T operator()(T block_aggregate) - */ - template < - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] block-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to all inputs. - { - InternalBlockScan(temp_storage, linear_tid).ExclusiveScan(input, output, scan_op, block_aggregate, block_prefix_op); - } - - - //@} end member group - /******************************************************************//** - * \name Exclusive prefix scan operations (identityless, multiple data per thread) - *********************************************************************/ - //@{ - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. With no identity value, the output computed for thread0 is undefined. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template < - int ITEMS_PER_THREAD, - typename ScanOp> - __device__ __forceinline__ void ExclusiveScan( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - ScanOp scan_op) ///< [in] Binary scan operator - { - // Reduce consecutive thread items in registers - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveScan(thread_partial, thread_partial, scan_op); - - // Exclusive scan in registers with prefix - ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no identity value, the output computed for thread0 is undefined. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template < - int ITEMS_PER_THREAD, - typename ScanOp> - __device__ __forceinline__ void ExclusiveScan( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] block-wide aggregate reduction of input items - { - // Reduce consecutive thread items in registers - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveScan(thread_partial, thread_partial, scan_op, block_aggregate); - - // Exclusive scan in registers with prefix - ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); - } - - - /** - * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * The \p block_prefix_op functor must implement a member function T operator()(T block_aggregate). - * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the first warp of threads in the block, however only the return value from - * lane0 is applied as the block-wide prefix. Can be stateful. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - * \tparam BlockPrefixOp [inferred] Call-back functor type having member T operator()(T block_aggregate) - */ - template < - int ITEMS_PER_THREAD, - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void ExclusiveScan( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] block-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to all inputs. - { - // Reduce consecutive thread items in registers - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveScan(thread_partial, thread_partial, scan_op, block_aggregate, block_prefix_op); - - // Exclusive scan in registers with prefix - ThreadScanExclusive(input, output, scan_op, thread_partial); - } - - - //@} end member group - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - /******************************************************************//** - * \name Inclusive prefix sum operations - *********************************************************************/ - //@{ - - - /** - * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an inclusive prefix sum of 128 integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain input item for each thread - * int thread_data; - * ... - * - * // Collectively compute the block-wide inclusive prefix sum - * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The - * corresponding output \p thread_data in those threads will be 1, 2, ..., 128. - * - */ - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item - T &output) ///< [out] Calling thread's output item (may be aliased to \p input) - { - T block_aggregate; - InternalBlockScan(temp_storage, linear_tid).InclusiveSum(input, output, block_aggregate); - } - - - /** - * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an inclusive prefix sum of 128 integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain input item for each thread - * int thread_data; - * ... - * - * // Collectively compute the block-wide inclusive prefix sum - * int block_aggregate; - * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data, block_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The - * corresponding output \p thread_data in those threads will be 1, 2, ..., 128. - * Furthermore the value \p 128 will be stored in \p block_aggregate for all threads. - * - */ - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate) ///< [out] block-wide aggregate reduction of input items - { - InternalBlockScan(temp_storage, linear_tid).InclusiveSum(input, output, block_aggregate); - } - - - - /** - * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * The \p block_prefix_op functor must implement a member function T operator()(T block_aggregate). - * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the first warp of threads in the block, however only the return value from - * lane0 is applied as the block-wide prefix. Can be stateful. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates a single thread block that progressively - * computes an inclusive prefix sum over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 128 integer items that are partitioned across 128 threads. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct BlockPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ BlockPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the first warp of threads in the block. - * // Thread-0 is responsible for returning a value for seeding the block-wide scan. - * __device__ int operator()(int block_aggregate) - * { - * int old_prefix = running_total; - * running_total += block_aggregate; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize BlockScan for 128 threads - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Initialize running total - * BlockPrefixOp prefix_op(0); - * - * // Have the block iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 128) - * { - * // Load a segment of consecutive items that are blocked across threads - * int thread_data = d_data[block_offset]; - * - * // Collectively compute the block-wide inclusive prefix sum - * int block_aggregate; - * BlockScan(temp_storage).InclusiveSum( - * thread_data, thread_data, block_aggregate, prefix_op); - * __syncthreads(); - * - * // Store scanned items to output segment - * d_data[block_offset] = thread_data; - * } - * \endcode - * \par - * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... - * The corresponding output for the first segment will be 1, 2, ..., 128. - * The output for the second segment will be 129, 130, ..., 256. Furthermore, - * the value \p 128 will be stored in \p block_aggregate for all threads after each scan. - * - * \tparam BlockPrefixOp [inferred] Call-back functor type having member T operator()(T block_aggregate) - */ - template - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate, ///< [out] block-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to all inputs. - { - InternalBlockScan(temp_storage, linear_tid).InclusiveSum(input, output, block_aggregate, block_prefix_op); - } - - - //@} end member group - /******************************************************************//** - * \name Inclusive prefix sum operations (multiple data per thread) - *********************************************************************/ - //@{ - - - /** - * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an inclusive prefix sum of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute the block-wide inclusive prefix sum - * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The - * corresponding output \p thread_data in those threads will be { [1,2,3,4], [5,6,7,8], ..., [509,510,511,512] }. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - */ - template - __device__ __forceinline__ void InclusiveSum( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD]) ///< [out] Calling thread's output items (may be aliased to \p input) - { - if (ITEMS_PER_THREAD == 1) - { - InclusiveSum(input[0], output[0]); - } - else - { - // Reduce consecutive thread items in registers - Sum scan_op; - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveSum(thread_partial, thread_partial); - - // Inclusive scan in registers with prefix - ThreadScanInclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); - } - } - - - /** - * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an inclusive prefix sum of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute the block-wide inclusive prefix sum - * int block_aggregate; - * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data, block_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is - * { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The - * corresponding output \p thread_data in those threads will be - * { [1,2,3,4], [5,6,7,8], ..., [509,510,511,512] }. - * Furthermore the value \p 512 will be stored in \p block_aggregate for all threads. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void InclusiveSum( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - T &block_aggregate) ///< [out] block-wide aggregate reduction of input items - { - if (ITEMS_PER_THREAD == 1) - { - InclusiveSum(input[0], output[0], block_aggregate); - } - else - { - // Reduce consecutive thread items in registers - Sum scan_op; - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveSum(thread_partial, thread_partial, block_aggregate); - - // Inclusive scan in registers with prefix - ThreadScanInclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); - } - } - - - /** - * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * The \p block_prefix_op functor must implement a member function T operator()(T block_aggregate). - * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the first warp of threads in the block, however only the return value from - * lane0 is applied as the block-wide prefix. Can be stateful. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates a single thread block that progressively - * computes an inclusive prefix sum over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 512 integer items that are partitioned in a [blocked arrangement](index.html#sec5sec4) - * across 128 threads where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct BlockPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ BlockPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the first warp of threads in the block. - * // Thread-0 is responsible for returning a value for seeding the block-wide scan. - * __device__ int operator()(int block_aggregate) - * { - * int old_prefix = running_total; - * running_total += block_aggregate; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize BlockLoad, BlockStore, and BlockScan for 128 threads, 4 ints per thread - * typedef cub::BlockLoad BlockLoad; - * typedef cub::BlockStore BlockStore; - * typedef cub::BlockScan BlockScan; - * - * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan - * __shared__ union { - * typename BlockLoad::TempStorage load; - * typename BlockScan::TempStorage scan; - * typename BlockStore::TempStorage store; - * } temp_storage; - * - * // Initialize running total - * BlockPrefixOp prefix_op(0); - * - * // Have the block iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) - * { - * // Load a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); - * __syncthreads(); - * - * // Collectively compute the block-wide inclusive prefix sum - * int block_aggregate; - * BlockScan(temp_storage.scan).IncluisveSum( - * thread_data, thread_data, block_aggregate, prefix_op); - * __syncthreads(); - * - * // Store scanned items to output segment - * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); - * __syncthreads(); - * } - * \endcode - * \par - * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... - * The corresponding output for the first segment will be 1, 2, 3, 4, ..., 511, 512. - * The output for the second segment will be 513, 514, 515, 516, ..., 1023, 1024. Furthermore, - * the value \p 512 will be stored in \p block_aggregate for all threads after each scan. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam BlockPrefixOp [inferred] Call-back functor type having member T operator()(T block_aggregate) - */ - template < - int ITEMS_PER_THREAD, - typename BlockPrefixOp> - __device__ __forceinline__ void InclusiveSum( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - T &block_aggregate, ///< [out] block-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to all inputs. - { - if (ITEMS_PER_THREAD == 1) - { - InclusiveSum(input[0], output[0], block_aggregate, block_prefix_op); - } - else - { - // Reduce consecutive thread items in registers - Sum scan_op; - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveSum(thread_partial, thread_partial, block_aggregate, block_prefix_op); - - // Inclusive scan in registers with prefix - ThreadScanInclusive(input, output, scan_op, thread_partial); - } - } - - - //@} end member group - /******************************************************************//** - * \name Inclusive prefix scan operations - *********************************************************************/ - //@{ - - - /** - * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an inclusive prefix max scan of 128 integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain input item for each thread - * int thread_data; - * ... - * - * // Collectively compute the block-wide inclusive prefix max scan - * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The - * corresponding output \p thread_data in those threads will be 0, 0, 2, 2, ..., 126, 126. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op) ///< [in] Binary scan operator - { - T block_aggregate; - InclusiveScan(input, output, scan_op, block_aggregate); - } - - - /** - * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an inclusive prefix max scan of 128 integer items that - * are partitioned across 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain input item for each thread - * int thread_data; - * ... - * - * // Collectively compute the block-wide inclusive prefix max scan - * int block_aggregate; - * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max(), block_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The - * corresponding output \p thread_data in those threads will be 0, 0, 2, 2, ..., 126, 126. - * Furthermore the value \p 126 will be stored in \p block_aggregate for all threads. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] block-wide aggregate reduction of input items - { - InternalBlockScan(temp_storage, linear_tid).InclusiveScan(input, output, scan_op, block_aggregate); - } - - - /** - * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * The \p block_prefix_op functor must implement a member function T operator()(T block_aggregate). - * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the first warp of threads in the block, however only the return value from - * lane0 is applied as the block-wide prefix. Can be stateful. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates a single thread block that progressively - * computes an inclusive prefix max scan over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 128 integer items that are partitioned across 128 threads. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct BlockPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ BlockPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the first warp of threads in the block. - * // Thread-0 is responsible for returning a value for seeding the block-wide scan. - * __device__ int operator()(int block_aggregate) - * { - * int old_prefix = running_total; - * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize BlockScan for 128 threads - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Initialize running total - * BlockPrefixOp prefix_op(INT_MIN); - * - * // Have the block iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 128) - * { - * // Load a segment of consecutive items that are blocked across threads - * int thread_data = d_data[block_offset]; - * - * // Collectively compute the block-wide inclusive prefix max scan - * int block_aggregate; - * BlockScan(temp_storage).InclusiveScan( - * thread_data, thread_data, cub::Max(), block_aggregate, prefix_op); - * __syncthreads(); - * - * // Store scanned items to output segment - * d_data[block_offset] = thread_data; - * } - * \endcode - * \par - * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... - * The corresponding output for the first segment will be 0, 0, 2, 2, ..., 126, 126. - * The output for the second segment will be 128, 128, 130, 130, ..., 254, 254. Furthermore, - * \p block_aggregate will be assigned \p 126 in all threads after the first scan, assigned \p 254 after the second - * scan, etc. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - * \tparam BlockPrefixOp [inferred] Call-back functor type having member T operator()(T block_aggregate) - */ - template < - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] block-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to all inputs. - { - InternalBlockScan(temp_storage, linear_tid).InclusiveScan(input, output, scan_op, block_aggregate, block_prefix_op); - } - - - //@} end member group - /******************************************************************//** - * \name Inclusive prefix scan operations (multiple data per thread) - *********************************************************************/ - //@{ - - - /** - * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an inclusive prefix max scan of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute the block-wide inclusive prefix max scan - * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. The - * corresponding output \p thread_data in those threads will be { [0,0,2,2], [4,4,6,6], ..., [508,508,510,510] }. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template < - int ITEMS_PER_THREAD, - typename ScanOp> - __device__ __forceinline__ void InclusiveScan( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - ScanOp scan_op) ///< [in] Binary scan operator - { - if (ITEMS_PER_THREAD == 1) - { - InclusiveScan(input[0], output[0], scan_op); - } - else - { - // Reduce consecutive thread items in registers - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveScan(thread_partial, thread_partial, scan_op); - - // Inclusive scan in registers with prefix - ThreadScanInclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); - } - } - - - /** - * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates an inclusive prefix max scan of 512 integer items that - * are partitioned in a [blocked arrangement](index.html#sec5sec4) across 128 threads - * where each thread owns 4 consecutive items. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize BlockScan for 128 threads on type int - * typedef cub::BlockScan BlockScan; - * - * // Allocate shared memory for BlockScan - * __shared__ typename BlockScan::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Collectively compute the block-wide inclusive prefix max scan - * int block_aggregate; - * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max(), block_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is - * { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. - * The corresponding output \p thread_data in those threads will be - * { [0,0,2,2], [4,4,6,6], ..., [508,508,510,510] }. - * Furthermore the value \p 510 will be stored in \p block_aggregate for all threads. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template < - int ITEMS_PER_THREAD, - typename ScanOp> - __device__ __forceinline__ void InclusiveScan( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] block-wide aggregate reduction of input items - { - if (ITEMS_PER_THREAD == 1) - { - InclusiveScan(input[0], output[0], scan_op, block_aggregate); - } - else - { - // Reduce consecutive thread items in registers - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveScan(thread_partial, thread_partial, scan_op, block_aggregate); - - // Inclusive scan in registers with prefix - ThreadScanInclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); - } - } - - - /** - * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - * - * The \p block_prefix_op functor must implement a member function T operator()(T block_aggregate). - * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the first warp of threads in the block, however only the return value from - * lane0 is applied as the block-wide prefix. Can be stateful. - * - * Supports non-commutative scan operators. - * - * \blocked - * - * \smemreuse - * - * The code snippet below illustrates a single thread block that progressively - * computes an inclusive prefix max scan over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 128 integer items that are partitioned across 128 threads. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct BlockPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ BlockPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the first warp of threads in the block. - * // Thread-0 is responsible for returning a value for seeding the block-wide scan. - * __device__ int operator()(int block_aggregate) - * { - * int old_prefix = running_total; - * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize BlockLoad, BlockStore, and BlockScan for 128 threads, 4 ints per thread - * typedef cub::BlockLoad BlockLoad; - * typedef cub::BlockStore BlockStore; - * typedef cub::BlockScan BlockScan; - * - * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan - * __shared__ union { - * typename BlockLoad::TempStorage load; - * typename BlockScan::TempStorage scan; - * typename BlockStore::TempStorage store; - * } temp_storage; - * - * // Initialize running total - * BlockPrefixOp prefix_op(0); - * - * // Have the block iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) - * { - * // Load a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); - * __syncthreads(); - * - * // Collectively compute the block-wide inclusive prefix max scan - * int block_aggregate; - * BlockScan(temp_storage.scan).InclusiveScan( - * thread_data, thread_data, cub::Max(), block_aggregate, prefix_op); - * __syncthreads(); - * - * // Store scanned items to output segment - * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); - * __syncthreads(); - * } - * \endcode - * \par - * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... - * The corresponding output for the first segment will be 0, 0, 2, 2, 4, 4, ..., 510, 510. - * The output for the second segment will be 512, 512, 514, 514, 516, 516, ..., 1022, 1022. Furthermore, - * \p block_aggregate will be assigned \p 510 in all threads after the first scan, assigned \p 1022 after the second - * scan, etc. - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - * \tparam BlockPrefixOp [inferred] Call-back functor type having member T operator()(T block_aggregate) - */ - template < - int ITEMS_PER_THREAD, - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void InclusiveScan( - T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items - T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] block-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to all inputs. - { - if (ITEMS_PER_THREAD == 1) - { - InclusiveScan(input[0], output[0], scan_op, block_aggregate, block_prefix_op); - } - else - { - // Reduce consecutive thread items in registers - T thread_partial = ThreadReduce(input, scan_op); - - // Exclusive threadblock-scan - ExclusiveScan(thread_partial, thread_partial, scan_op, block_aggregate, block_prefix_op); - - // Inclusive scan in registers with prefix - ThreadScanInclusive(input, output, scan_op, thread_partial); - } - } - - //@} end member group - - -}; - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/block_store.cuh b/kokkos/kokkos/TPL/cub/block/block_store.cuh deleted file mode 100644 index fb990de..0000000 --- a/kokkos/kokkos/TPL/cub/block/block_store.cuh +++ /dev/null @@ -1,926 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Operations for writing linear segments of data from the CUDA thread block - */ - -#pragma once - -#include - -#include "../util_namespace.cuh" -#include "../util_macro.cuh" -#include "../util_type.cuh" -#include "../util_vector.cuh" -#include "../thread/thread_store.cuh" -#include "block_exchange.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \addtogroup IoModule - * @{ - */ - - -/******************************************************************//** - * \name Blocked I/O - *********************************************************************/ -//@{ - -/** - * \brief Store a blocked arrangement of items across a thread block into a linear segment of items using the specified cache modifier. - * - * \blocked - * - * \tparam MODIFIER cub::PtxStoreModifier cache modifier. - * \tparam T [inferred] The data type to store. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam OutputIteratorRA [inferred] The random-access iterator type for output (may be a simple pointer type). - */ -template < - PtxStoreModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorRA> -__device__ __forceinline__ void StoreBlocked( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store -{ - // Store directly in thread-blocked order - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - ThreadStore(block_itr + (linear_tid * ITEMS_PER_THREAD) + ITEM, items[ITEM]); - } -} - - -/** - * \brief Store a blocked arrangement of items across a thread block into a linear segment of items using the specified cache modifier, guarded by range - * - * \blocked - * - * \tparam MODIFIER cub::PtxStoreModifier cache modifier. - * \tparam T [inferred] The data type to store. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam OutputIteratorRA [inferred] The random-access iterator type for output (may be a simple pointer type). - */ -template < - PtxStoreModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorRA> -__device__ __forceinline__ void StoreBlocked( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store - int valid_items) ///< [in] Number of valid items to write -{ - // Store directly in thread-blocked order - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - if (ITEM + (linear_tid * ITEMS_PER_THREAD) < valid_items) - { - ThreadStore(block_itr + (linear_tid * ITEMS_PER_THREAD) + ITEM, items[ITEM]); - } - } -} - - - -//@} end member group -/******************************************************************//** - * \name Striped I/O - *********************************************************************/ -//@{ - - -/** - * \brief Store a striped arrangement of data across the thread block into a linear segment of items using the specified cache modifier. - * - * \striped - * - * \tparam MODIFIER cub::PtxStoreModifier cache modifier. - * \tparam BLOCK_THREADS The thread block size in threads - * \tparam T [inferred] The data type to store. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam OutputIteratorRA [inferred] The random-access iterator type for output (may be a simple pointer type). - */ -template < - PtxStoreModifier MODIFIER, - int BLOCK_THREADS, - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorRA> -__device__ __forceinline__ void StoreStriped( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store -{ - // Store directly in striped order - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - ThreadStore(block_itr + (ITEM * BLOCK_THREADS) + linear_tid, items[ITEM]); - } -} - - -/** - * \brief Store a striped arrangement of data across the thread block into a linear segment of items using the specified cache modifier, guarded by range - * - * \striped - * - * \tparam MODIFIER cub::PtxStoreModifier cache modifier. - * \tparam BLOCK_THREADS The thread block size in threads - * \tparam T [inferred] The data type to store. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam OutputIteratorRA [inferred] The random-access iterator type for output (may be a simple pointer type). - */ -template < - PtxStoreModifier MODIFIER, - int BLOCK_THREADS, - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorRA> -__device__ __forceinline__ void StoreStriped( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store - int valid_items) ///< [in] Number of valid items to write -{ - // Store directly in striped order - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - if ((ITEM * BLOCK_THREADS) + linear_tid < valid_items) - { - ThreadStore(block_itr + (ITEM * BLOCK_THREADS) + linear_tid, items[ITEM]); - } - } -} - - - -//@} end member group -/******************************************************************//** - * \name Warp-striped I/O - *********************************************************************/ -//@{ - - -/** - * \brief Store a warp-striped arrangement of data across the thread block into a linear segment of items using the specified cache modifier. - * - * \warpstriped - * - * \par Usage Considerations - * The number of threads in the thread block must be a multiple of the architecture's warp size. - * - * \tparam MODIFIER cub::PtxStoreModifier cache modifier. - * \tparam T [inferred] The data type to store. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam OutputIteratorRA [inferred] The random-access iterator type for output (may be a simple pointer type). - */ -template < - PtxStoreModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorRA> -__device__ __forceinline__ void StoreWarpStriped( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load -{ - int tid = linear_tid & (PtxArchProps::WARP_THREADS - 1); - int wid = linear_tid >> PtxArchProps::LOG_WARP_THREADS; - int warp_offset = wid * PtxArchProps::WARP_THREADS * ITEMS_PER_THREAD; - - // Store directly in warp-striped order - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - ThreadStore(block_itr + warp_offset + tid + (ITEM * PtxArchProps::WARP_THREADS), items[ITEM]); - } -} - - -/** - * \brief Store a warp-striped arrangement of data across the thread block into a linear segment of items using the specified cache modifier, guarded by range - * - * \warpstriped - * - * \par Usage Considerations - * The number of threads in the thread block must be a multiple of the architecture's warp size. - * - * \tparam MODIFIER cub::PtxStoreModifier cache modifier. - * \tparam T [inferred] The data type to store. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * \tparam OutputIteratorRA [inferred] The random-access iterator type for output (may be a simple pointer type). - */ -template < - PtxStoreModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD, - typename OutputIteratorRA> -__device__ __forceinline__ void StoreWarpStriped( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store - int valid_items) ///< [in] Number of valid items to write -{ - int tid = linear_tid & (PtxArchProps::WARP_THREADS - 1); - int wid = linear_tid >> PtxArchProps::LOG_WARP_THREADS; - int warp_offset = wid * PtxArchProps::WARP_THREADS * ITEMS_PER_THREAD; - - // Store directly in warp-striped order - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - if (warp_offset + tid + (ITEM * PtxArchProps::WARP_THREADS) < valid_items) - { - ThreadStore(block_itr + warp_offset + tid + (ITEM * PtxArchProps::WARP_THREADS), items[ITEM]); - } - } -} - - - -//@} end member group -/******************************************************************//** - * \name Blocked, vectorized I/O - *********************************************************************/ -//@{ - -/** - * \brief Store a blocked arrangement of items across a thread block into a linear segment of items using the specified cache modifier. - * - * \blocked - * - * The output offset (\p block_ptr + \p block_offset) must be quad-item aligned, - * which is the default starting offset returned by \p cudaMalloc() - * - * \par - * The following conditions will prevent vectorization and storing will fall back to cub::BLOCK_STORE_DIRECT: - * - \p ITEMS_PER_THREAD is odd - * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) - * - * \tparam MODIFIER cub::PtxStoreModifier cache modifier. - * \tparam T [inferred] The data type to store. - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - * - */ -template < - PtxStoreModifier MODIFIER, - typename T, - int ITEMS_PER_THREAD> -__device__ __forceinline__ void StoreBlockedVectorized( - int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - T *block_ptr, ///< [in] Input pointer for storing from - T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store -{ - enum - { - // Maximum CUDA vector size is 4 elements - MAX_VEC_SIZE = CUB_MIN(4, ITEMS_PER_THREAD), - - // Vector size must be a power of two and an even divisor of the items per thread - VEC_SIZE = ((((MAX_VEC_SIZE - 1) & MAX_VEC_SIZE) == 0) && ((ITEMS_PER_THREAD % MAX_VEC_SIZE) == 0)) ? - MAX_VEC_SIZE : - 1, - - VECTORS_PER_THREAD = ITEMS_PER_THREAD / VEC_SIZE, - }; - - // Vector type - typedef typename VectorHelper::Type Vector; - - // Alias global pointer - Vector *block_ptr_vectors = reinterpret_cast(block_ptr); - - // Alias pointers (use "raw" array here which should get optimized away to prevent conservative PTXAS lmem spilling) - Vector raw_vector[VECTORS_PER_THREAD]; - T *raw_items = reinterpret_cast(raw_vector); - - // Copy - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - raw_items[ITEM] = items[ITEM]; - } - - // Direct-store using vector types - StoreBlocked(linear_tid, block_ptr_vectors, raw_vector); -} - - -//@} end member group - - -/** @} */ // end group IoModule - - -//----------------------------------------------------------------------------- -// Generic BlockStore abstraction -//----------------------------------------------------------------------------- - -/** - * \brief cub::BlockStoreAlgorithm enumerates alternative algorithms for cub::BlockStore to write a blocked arrangement of items across a CUDA thread block to a linear segment of memory. - */ -enum BlockStoreAlgorithm -{ - /** - * \par Overview - * - * A [blocked arrangement](index.html#sec5sec4) of data is written - * directly to memory. The thread block writes items in a parallel "raking" fashion: - * threadi writes the ith segment of consecutive elements. - * - * \par Performance Considerations - * - The utilization of memory transactions (coalescing) decreases as the - * access stride between threads increases (i.e., the number items per thread). - */ - BLOCK_STORE_DIRECT, - - /** - * \par Overview - * - * A [blocked arrangement](index.html#sec5sec4) of data is written directly - * to memory using CUDA's built-in vectorized stores as a coalescing optimization. - * The thread block writes items in a parallel "raking" fashion: threadi uses vector stores to - * write the ith segment of consecutive elements. - * - * For example, st.global.v4.s32 instructions will be generated when \p T = \p int and \p ITEMS_PER_THREAD > 4. - * - * \par Performance Considerations - * - The utilization of memory transactions (coalescing) remains high until the the - * access stride between threads (i.e., the number items per thread) exceeds the - * maximum vector store width (typically 4 items or 64B, whichever is lower). - * - The following conditions will prevent vectorization and writing will fall back to cub::BLOCK_STORE_DIRECT: - * - \p ITEMS_PER_THREAD is odd - * - The \p OutputIteratorRA is not a simple pointer type - * - The block output offset is not quadword-aligned - * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) - */ - BLOCK_STORE_VECTORIZE, - - /** - * \par Overview - * A [blocked arrangement](index.html#sec5sec4) is locally - * transposed into a [striped arrangement](index.html#sec5sec4) - * which is then written to memory. More specifically, cub::BlockExchange - * used to locally reorder the items into a - * [striped arrangement](index.html#sec5sec4), after which the - * thread block writes items in a parallel "strip-mining" fashion: consecutive - * items owned by threadi are written to memory with - * stride \p BLOCK_THREADS between them. - * - * \par Performance Considerations - * - The utilization of memory transactions (coalescing) remains high regardless - * of items written per thread. - * - The local reordering incurs slightly longer latencies and throughput than the - * direct cub::BLOCK_STORE_DIRECT and cub::BLOCK_STORE_VECTORIZE alternatives. - */ - BLOCK_STORE_TRANSPOSE, - - /** - * \par Overview - * A [blocked arrangement](index.html#sec5sec4) is locally - * transposed into a [warp-striped arrangement](index.html#sec5sec4) - * which is then written to memory. More specifically, cub::BlockExchange used - * to locally reorder the items into a - * [warp-striped arrangement](index.html#sec5sec4), after which - * each warp writes its own contiguous segment in a parallel "strip-mining" fashion: - * consecutive items owned by lanei are written to memory - * with stride \p WARP_THREADS between them. - * - * \par Performance Considerations - * - The utilization of memory transactions (coalescing) remains high regardless - * of items written per thread. - * - The local reordering incurs slightly longer latencies and throughput than the - * direct cub::BLOCK_STORE_DIRECT and cub::BLOCK_STORE_VECTORIZE alternatives. - */ - BLOCK_STORE_WARP_TRANSPOSE, -}; - - - -/** - * \addtogroup BlockModule - * @{ - */ - - -/** - * \brief The BlockStore class provides [collective](index.html#sec0) data movement methods for writing a [blocked arrangement](index.html#sec5sec4) of items partitioned across a CUDA thread block to a linear segment of memory. ![](block_store_logo.png) - * - * \par Overview - * The BlockStore class provides a single data movement abstraction that can be specialized - * to implement different cub::BlockStoreAlgorithm strategies. This facilitates different - * performance policies for different architectures, data types, granularity sizes, etc. - * - * \par Optionally, BlockStore can be specialized by different data movement strategies: - * -# cub::BLOCK_STORE_DIRECT. A [blocked arrangement](index.html#sec5sec4) of data is written - * directly to memory. [More...](\ref cub::BlockStoreAlgorithm) - * -# cub::BLOCK_STORE_VECTORIZE. A [blocked arrangement](index.html#sec5sec4) - * of data is written directly to memory using CUDA's built-in vectorized stores as a - * coalescing optimization. [More...](\ref cub::BlockStoreAlgorithm) - * -# cub::BLOCK_STORE_TRANSPOSE. A [blocked arrangement](index.html#sec5sec4) - * is locally transposed into a [striped arrangement](index.html#sec5sec4) which is - * then written to memory. [More...](\ref cub::BlockStoreAlgorithm) - * -# cub::BLOCK_STORE_WARP_TRANSPOSE. A [blocked arrangement](index.html#sec5sec4) - * is locally transposed into a [warp-striped arrangement](index.html#sec5sec4) which is - * then written to memory. [More...](\ref cub::BlockStoreAlgorithm) - * - * \tparam OutputIteratorRA The input iterator type (may be a simple pointer type). - * \tparam BLOCK_THREADS The thread block size in threads. - * \tparam ITEMS_PER_THREAD The number of consecutive items partitioned onto each thread. - * \tparam ALGORITHM [optional] cub::BlockStoreAlgorithm tuning policy enumeration. default: cub::BLOCK_STORE_DIRECT. - * \tparam MODIFIER [optional] cub::PtxStoreModifier cache modifier. default: cub::STORE_DEFAULT. - * \tparam WARP_TIME_SLICING [optional] For transposition-based cub::BlockStoreAlgorithm parameterizations that utilize shared memory: When \p true, only use enough shared memory for a single warp's worth of data, time-slicing the block-wide exchange over multiple synchronized rounds (default: false) - * - * \par A Simple Example - * \blockcollective{BlockStore} - * \par - * The code snippet below illustrates the storing of a "blocked" arrangement - * of 512 integers across 128 threads (where each thread owns 4 consecutive items) - * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, - * meaning items are locally reordered among threads so that memory references will be - * efficiently coalesced using a warp-striped access pattern. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, ...) - * { - * // Specialize BlockStore for 128 threads owning 4 integer items each - * typedef cub::BlockStore BlockStore; - * - * // Allocate shared memory for BlockStore - * __shared__ typename BlockStore::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Store items to linear memory - * int thread_data[4]; - * BlockStore(temp_storage).Store(d_data, thread_data); - * - * \endcode - * \par - * Suppose the set of \p thread_data across the block of threads is - * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. - * The output \p d_data will be 0, 1, 2, 3, 4, 5, .... - * - */ -template < - typename OutputIteratorRA, - int BLOCK_THREADS, - int ITEMS_PER_THREAD, - BlockStoreAlgorithm ALGORITHM = BLOCK_STORE_DIRECT, - PtxStoreModifier MODIFIER = STORE_DEFAULT, - bool WARP_TIME_SLICING = false> -class BlockStore -{ -private: - /****************************************************************************** - * Constants and typed definitions - ******************************************************************************/ - - // Data type of input iterator - typedef typename std::iterator_traits::value_type T; - - - /****************************************************************************** - * Algorithmic variants - ******************************************************************************/ - - /// Store helper - template - struct StoreInternal; - - - /** - * BLOCK_STORE_DIRECT specialization of store helper - */ - template - struct StoreInternal - { - /// Shared memory storage layout type - typedef NullType TempStorage; - - /// Linear thread-id - int linear_tid; - - /// Constructor - __device__ __forceinline__ StoreInternal( - TempStorage &temp_storage, - int linear_tid) - : - linear_tid(linear_tid) - {} - - /// Store items into a linear segment of memory - __device__ __forceinline__ void Store( - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store - { - StoreBlocked(linear_tid, block_itr, items); - } - - /// Store items into a linear segment of memory, guarded by range - __device__ __forceinline__ void Store( - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store - int valid_items) ///< [in] Number of valid items to write - { - StoreBlocked(linear_tid, block_itr, items, valid_items); - } - }; - - - /** - * BLOCK_STORE_VECTORIZE specialization of store helper - */ - template - struct StoreInternal - { - /// Shared memory storage layout type - typedef NullType TempStorage; - - /// Linear thread-id - int linear_tid; - - /// Constructor - __device__ __forceinline__ StoreInternal( - TempStorage &temp_storage, - int linear_tid) - : - linear_tid(linear_tid) - {} - - /// Store items into a linear segment of memory, specialized for native pointer types (attempts vectorization) - __device__ __forceinline__ void Store( - T *block_ptr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store - { - StoreBlockedVectorized(linear_tid, block_ptr, items); - } - - /// Store items into a linear segment of memory, specialized for opaque input iterators (skips vectorization) - template - __device__ __forceinline__ void Store( - _OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store - { - StoreBlocked(linear_tid, block_itr, items); - } - - /// Store items into a linear segment of memory, guarded by range - __device__ __forceinline__ void Store( - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store - int valid_items) ///< [in] Number of valid items to write - { - StoreBlocked(linear_tid, block_itr, items, valid_items); - } - }; - - - /** - * BLOCK_STORE_TRANSPOSE specialization of store helper - */ - template - struct StoreInternal - { - // BlockExchange utility type for keys - typedef BlockExchange BlockExchange; - - /// Shared memory storage layout type - typedef typename BlockExchange::TempStorage _TempStorage; - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - /// Thread reference to shared storage - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - - /// Constructor - __device__ __forceinline__ StoreInternal( - TempStorage &temp_storage, - int linear_tid) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - /// Store items into a linear segment of memory - __device__ __forceinline__ void Store( - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store - { - BlockExchange(temp_storage).BlockedToStriped(items); - StoreStriped(linear_tid, block_itr, items); - } - - /// Store items into a linear segment of memory, guarded by range - __device__ __forceinline__ void Store( - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store - int valid_items) ///< [in] Number of valid items to write - { - BlockExchange(temp_storage).BlockedToStriped(items); - StoreStriped(linear_tid, block_itr, items, valid_items); - } - }; - - - /** - * BLOCK_STORE_WARP_TRANSPOSE specialization of store helper - */ - template - struct StoreInternal - { - enum - { - WARP_THREADS = PtxArchProps::WARP_THREADS - }; - - // Assert BLOCK_THREADS must be a multiple of WARP_THREADS - CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); - - // BlockExchange utility type for keys - typedef BlockExchange BlockExchange; - - /// Shared memory storage layout type - typedef typename BlockExchange::TempStorage _TempStorage; - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - /// Thread reference to shared storage - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - - /// Constructor - __device__ __forceinline__ StoreInternal( - TempStorage &temp_storage, - int linear_tid) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - /// Store items into a linear segment of memory - __device__ __forceinline__ void Store( - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store - { - BlockExchange(temp_storage).BlockedToWarpStriped(items); - StoreWarpStriped(linear_tid, block_itr, items); - } - - /// Store items into a linear segment of memory, guarded by range - __device__ __forceinline__ void Store( - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store - int valid_items) ///< [in] Number of valid items to write - { - BlockExchange(temp_storage).BlockedToWarpStriped(items); - StoreWarpStriped(linear_tid, block_itr, items, valid_items); - } - }; - - /****************************************************************************** - * Type definitions - ******************************************************************************/ - - /// Internal load implementation to use - typedef StoreInternal InternalStore; - - - /// Shared memory storage layout type - typedef typename InternalStore::TempStorage _TempStorage; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /// Internal storage allocator - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ _TempStorage private_storage; - return private_storage; - } - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Thread reference to shared storage - _TempStorage &temp_storage; - - /// Linear thread-id - int linear_tid; - -public: - - - /// \smemstorage{BlockStore} - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockStore() - : - temp_storage(PrivateStorage()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Threads are identified using threadIdx.x. - */ - __device__ __forceinline__ BlockStore( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - linear_tid(threadIdx.x) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Each thread is identified using the supplied linear thread identifier - */ - __device__ __forceinline__ BlockStore( - int linear_tid) ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(PrivateStorage()), - linear_tid(linear_tid) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Each thread is identified using the supplied linear thread identifier. - */ - __device__ __forceinline__ BlockStore( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int linear_tid) ///< [in] [optional] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - - //@} end member group - /******************************************************************//** - * \name Data movement - *********************************************************************/ - //@{ - - - /** - * \brief Store items into a linear segment of memory. - * - * \blocked - * - * The code snippet below illustrates the storing of a "blocked" arrangement - * of 512 integers across 128 threads (where each thread owns 4 consecutive items) - * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, - * meaning items are locally reordered among threads so that memory references will be - * efficiently coalesced using a warp-striped access pattern. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, ...) - * { - * // Specialize BlockStore for 128 threads owning 4 integer items each - * typedef cub::BlockStore BlockStore; - * - * // Allocate shared memory for BlockStore - * __shared__ typename BlockStore::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Store items to linear memory - * int thread_data[4]; - * BlockStore(temp_storage).Store(d_data, thread_data); - * - * \endcode - * \par - * Suppose the set of \p thread_data across the block of threads is - * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. - * The output \p d_data will be 0, 1, 2, 3, 4, 5, .... - * - */ - __device__ __forceinline__ void Store( - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store - { - InternalStore(temp_storage, linear_tid).Store(block_itr, items); - } - - /** - * \brief Store items into a linear segment of memory, guarded by range. - * - * \blocked - * - * The code snippet below illustrates the guarded storing of a "blocked" arrangement - * of 512 integers across 128 threads (where each thread owns 4 consecutive items) - * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, - * meaning items are locally reordered among threads so that memory references will be - * efficiently coalesced using a warp-striped access pattern. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, int valid_items, ...) - * { - * // Specialize BlockStore for 128 threads owning 4 integer items each - * typedef cub::BlockStore BlockStore; - * - * // Allocate shared memory for BlockStore - * __shared__ typename BlockStore::TempStorage temp_storage; - * - * // Obtain a segment of consecutive items that are blocked across threads - * int thread_data[4]; - * ... - * - * // Store items to linear memory - * int thread_data[4]; - * BlockStore(temp_storage).Store(d_data, thread_data, valid_items); - * - * \endcode - * \par - * Suppose the set of \p thread_data across the block of threads is - * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] } and \p valid_items is \p 5. - * The output \p d_data will be 0, 1, 2, 3, 4, ?, ?, ?, ..., with - * only the first two threads being unmasked to store portions of valid data. - * - */ - __device__ __forceinline__ void Store( - OutputIteratorRA block_itr, ///< [in] The thread block's base output iterator for storing to - T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store - int valid_items) ///< [in] Number of valid items to write - { - InternalStore(temp_storage, linear_tid).Store(block_itr, items, valid_items); - } -}; - -/** @} */ // end group BlockModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/specializations/block_histogram_atomic.cuh b/kokkos/kokkos/TPL/cub/block/specializations/block_histogram_atomic.cuh deleted file mode 100644 index ecc9800..0000000 --- a/kokkos/kokkos/TPL/cub/block/specializations/block_histogram_atomic.cuh +++ /dev/null @@ -1,85 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * The cub::BlockHistogramAtomic class provides atomic-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. - */ - -#pragma once - -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \brief The BlockHistogramAtomic class provides atomic-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. - */ -template < - typename T, - int BLOCK_THREADS, - int ITEMS_PER_THREAD, - int BINS> -struct BlockHistogramAtomic -{ - /// Shared memory storage layout type - struct TempStorage {}; - - - /// Constructor - __device__ __forceinline__ BlockHistogramAtomic( - TempStorage &temp_storage, - int linear_tid) - {} - - - /// Composite data onto an existing histogram - template < - typename HistoCounter> - __device__ __forceinline__ void Composite( - T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram - HistoCounter histogram[BINS]) ///< [out] Reference to shared/global memory histogram - { - // Update histogram - #pragma unroll - for (int i = 0; i < ITEMS_PER_THREAD; ++i) - { - atomicAdd(histogram + items[i], 1); - } - } - -}; - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/specializations/block_histogram_sort.cuh b/kokkos/kokkos/TPL/cub/block/specializations/block_histogram_sort.cuh deleted file mode 100644 index e81edec..0000000 --- a/kokkos/kokkos/TPL/cub/block/specializations/block_histogram_sort.cuh +++ /dev/null @@ -1,197 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * The cub::BlockHistogramSort class provides sorting-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. - */ - -#pragma once - -#include "../../block/block_radix_sort.cuh" -#include "../../block/block_discontinuity.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - - -/** - * \brief The BlockHistogramSort class provides sorting-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. - */ -template < - typename T, - int BLOCK_THREADS, - int ITEMS_PER_THREAD, - int BINS> -struct BlockHistogramSort -{ - // Parameterize BlockRadixSort type for our thread block - typedef BlockRadixSort BlockRadixSortT; - - // Parameterize BlockDiscontinuity type for our thread block - typedef BlockDiscontinuity BlockDiscontinuityT; - - // Shared memory - union _TempStorage - { - // Storage for sorting bin values - typename BlockRadixSortT::TempStorage sort; - - struct - { - // Storage for detecting discontinuities in the tile of sorted bin values - typename BlockDiscontinuityT::TempStorage flag; - - // Storage for noting begin/end offsets of bin runs in the tile of sorted bin values - unsigned int run_begin[BINS]; - unsigned int run_end[BINS]; - }; - }; - - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - // Thread fields - _TempStorage &temp_storage; - int linear_tid; - - - /// Constructor - __device__ __forceinline__ BlockHistogramSort( - TempStorage &temp_storage, - int linear_tid) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - - // Discontinuity functor - struct DiscontinuityOp - { - // Reference to temp_storage - _TempStorage &temp_storage; - - // Constructor - __device__ __forceinline__ DiscontinuityOp(_TempStorage &temp_storage) : - temp_storage(temp_storage) - {} - - // Discontinuity predicate - __device__ __forceinline__ bool operator()(const T &a, const T &b, unsigned int b_index) - { - if (a != b) - { - // Note the begin/end offsets in shared storage - temp_storage.run_begin[b] = b_index; - temp_storage.run_end[a] = b_index; - - return true; - } - else - { - return false; - } - } - }; - - - // Composite data onto an existing histogram - template < - typename HistoCounter> - __device__ __forceinline__ void Composite( - T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram - HistoCounter histogram[BINS]) ///< [out] Reference to shared/global memory histogram - { - enum { TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD }; - - // Sort bytes in blocked arrangement - BlockRadixSortT(temp_storage.sort, linear_tid).Sort(items); - - __syncthreads(); - - // Initialize the shared memory's run_begin and run_end for each bin - int histo_offset = 0; - - #pragma unroll - for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) - { - temp_storage.run_begin[histo_offset + linear_tid] = TILE_SIZE; - temp_storage.run_end[histo_offset + linear_tid] = TILE_SIZE; - } - // Finish up with guarded initialization if necessary - if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) - { - temp_storage.run_begin[histo_offset + linear_tid] = TILE_SIZE; - temp_storage.run_end[histo_offset + linear_tid] = TILE_SIZE; - } - - __syncthreads(); - - int flags[ITEMS_PER_THREAD]; // unused - - // Compute head flags to demarcate contiguous runs of the same bin in the sorted tile - DiscontinuityOp flag_op(temp_storage); - BlockDiscontinuityT(temp_storage.flag, linear_tid).FlagHeads(flags, items, flag_op); - - // Update begin for first item - if (linear_tid == 0) temp_storage.run_begin[items[0]] = 0; - - __syncthreads(); - - // Composite into histogram - histo_offset = 0; - - #pragma unroll - for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) - { - int thread_offset = histo_offset + linear_tid; - HistoCounter count = temp_storage.run_end[thread_offset] - temp_storage.run_begin[thread_offset]; - histogram[thread_offset] += count; - } - // Finish up with guarded composition if necessary - if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) - { - int thread_offset = histo_offset + linear_tid; - HistoCounter count = temp_storage.run_end[thread_offset] - temp_storage.run_begin[thread_offset]; - histogram[thread_offset] += count; - } - } - -}; - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/specializations/block_reduce_raking.cuh b/kokkos/kokkos/TPL/cub/block/specializations/block_reduce_raking.cuh deleted file mode 100644 index 434d25a..0000000 --- a/kokkos/kokkos/TPL/cub/block/specializations/block_reduce_raking.cuh +++ /dev/null @@ -1,214 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockReduceRaking provides raking-based methods of parallel reduction across a CUDA threadblock - */ - -#pragma once - -#include "../../block/block_raking_layout.cuh" -#include "../../warp/warp_reduce.cuh" -#include "../../thread/thread_reduce.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \brief BlockReduceRaking provides raking-based methods of parallel reduction across a CUDA threadblock - */ -template < - typename T, ///< Data type being reduced - int BLOCK_THREADS> ///< The thread block size in threads -struct BlockReduceRaking -{ - /// Layout type for padded threadblock raking grid - typedef BlockRakingLayout BlockRakingLayout; - - /// WarpReduce utility type - typedef typename WarpReduce::InternalWarpReduce WarpReduce; - - /// Constants - enum - { - /// Number of raking threads - RAKING_THREADS = BlockRakingLayout::RAKING_THREADS, - - /// Number of raking elements per warp synchronous raking thread - SEGMENT_LENGTH = BlockRakingLayout::SEGMENT_LENGTH, - - /// Cooperative work can be entirely warp synchronous - WARP_SYNCHRONOUS = (RAKING_THREADS == BLOCK_THREADS), - - /// Whether or not warp-synchronous reduction should be unguarded (i.e., the warp-reduction elements is a power of two - WARP_SYNCHRONOUS_UNGUARDED = ((RAKING_THREADS & (RAKING_THREADS - 1)) == 0), - - /// Whether or not accesses into smem are unguarded - RAKING_UNGUARDED = BlockRakingLayout::UNGUARDED, - - }; - - - /// Shared memory storage layout type - struct _TempStorage - { - typename WarpReduce::TempStorage warp_storage; ///< Storage for warp-synchronous reduction - typename BlockRakingLayout::TempStorage raking_grid; ///< Padded threadblock raking grid - }; - - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - // Thread fields - _TempStorage &temp_storage; - int linear_tid; - - - /// Constructor - __device__ __forceinline__ BlockReduceRaking( - TempStorage &temp_storage, - int linear_tid) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - - /// Computes a threadblock-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. - template - __device__ __forceinline__ T Sum( - T partial, ///< [in] Calling thread's input partial reductions - int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) - { - cub::Sum reduction_op; - - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp synchronous reduction (unguarded if active threads is a power-of-two) - partial = WarpReduce(temp_storage.warp_storage, 0, linear_tid).template Sum( - partial, - num_valid); - } - else - { - // Place partial into shared memory grid. - *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid) = partial; - - __syncthreads(); - - // Reduce parallelism to one warp - if (linear_tid < RAKING_THREADS) - { - // Raking reduction in grid - T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); - partial = raking_segment[0]; - - #pragma unroll - for (int ITEM = 1; ITEM < SEGMENT_LENGTH; ITEM++) - { - // Update partial if addend is in range - if ((FULL_TILE && RAKING_UNGUARDED) || ((linear_tid * SEGMENT_LENGTH) + ITEM < num_valid)) - { - partial = reduction_op(partial, raking_segment[ITEM]); - } - } - - partial = WarpReduce(temp_storage.warp_storage, 0, linear_tid).template Sum( - partial, - num_valid); - } - } - - return partial; - } - - - /// Computes a threadblock-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. - template < - bool FULL_TILE, - typename ReductionOp> - __device__ __forceinline__ T Reduce( - T partial, ///< [in] Calling thread's input partial reductions - int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) - ReductionOp reduction_op) ///< [in] Binary reduction operator - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp synchronous reduction (unguarded if active threads is a power-of-two) - partial = WarpReduce(temp_storage.warp_storage, 0, linear_tid).template Reduce( - partial, - num_valid, - reduction_op); - } - else - { - // Place partial into shared memory grid. - *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid) = partial; - - __syncthreads(); - - // Reduce parallelism to one warp - if (linear_tid < RAKING_THREADS) - { - // Raking reduction in grid - T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); - partial = raking_segment[0]; - - #pragma unroll - for (int ITEM = 1; ITEM < SEGMENT_LENGTH; ITEM++) - { - // Update partial if addend is in range - if ((FULL_TILE && RAKING_UNGUARDED) || ((linear_tid * SEGMENT_LENGTH) + ITEM < num_valid)) - { - partial = reduction_op(partial, raking_segment[ITEM]); - } - } - - partial = WarpReduce(temp_storage.warp_storage, 0, linear_tid).template Reduce( - partial, - num_valid, - reduction_op); - } - } - - return partial; - } - -}; - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/specializations/block_reduce_warp_reductions.cuh b/kokkos/kokkos/TPL/cub/block/specializations/block_reduce_warp_reductions.cuh deleted file mode 100644 index 0e316dd..0000000 --- a/kokkos/kokkos/TPL/cub/block/specializations/block_reduce_warp_reductions.cuh +++ /dev/null @@ -1,198 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA threadblock - */ - -#pragma once - -#include "../../warp/warp_reduce.cuh" -#include "../../util_arch.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \brief BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA threadblock - */ -template < - typename T, ///< Data type being reduced - int BLOCK_THREADS> ///< The thread block size in threads -struct BlockReduceWarpReductions -{ - /// Constants - enum - { - /// Number of active warps - WARPS = (BLOCK_THREADS + PtxArchProps::WARP_THREADS - 1) / PtxArchProps::WARP_THREADS, - - /// The logical warp size for warp reductions - LOGICAL_WARP_SIZE = CUB_MIN(BLOCK_THREADS, PtxArchProps::WARP_THREADS), - - /// Whether or not the logical warp size evenly divides the threadblock size - EVEN_WARP_MULTIPLE = (BLOCK_THREADS % LOGICAL_WARP_SIZE == 0) - }; - - - /// WarpReduce utility type - typedef typename WarpReduce::InternalWarpReduce WarpReduce; - - - /// Shared memory storage layout type - struct _TempStorage - { - typename WarpReduce::TempStorage warp_reduce; ///< Buffer for warp-synchronous scan - T warp_aggregates[WARPS]; ///< Shared totals from each warp-synchronous scan - T block_prefix; ///< Shared prefix for the entire threadblock - }; - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - // Thread fields - _TempStorage &temp_storage; - int linear_tid; - int warp_id; - int lane_id; - - - /// Constructor - __device__ __forceinline__ BlockReduceWarpReductions( - TempStorage &temp_storage, - int linear_tid) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid), - warp_id((BLOCK_THREADS <= PtxArchProps::WARP_THREADS) ? - 0 : - linear_tid / PtxArchProps::WARP_THREADS), - lane_id((BLOCK_THREADS <= PtxArchProps::WARP_THREADS) ? - linear_tid : - linear_tid % PtxArchProps::WARP_THREADS) - {} - - - /// Returns block-wide aggregate in thread0. - template < - bool FULL_TILE, - typename ReductionOp> - __device__ __forceinline__ T ApplyWarpAggregates( - ReductionOp reduction_op, ///< [in] Binary scan operator - T warp_aggregate, ///< [in] [lane0s only] Warp-wide aggregate reduction of input items - int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) - { - // Share lane aggregates - if (lane_id == 0) - { - temp_storage.warp_aggregates[warp_id] = warp_aggregate; - } - - __syncthreads(); - - // Update total aggregate in warp 0, lane 0 - if (linear_tid == 0) - { - #pragma unroll - for (int SUCCESSOR_WARP = 1; SUCCESSOR_WARP < WARPS; SUCCESSOR_WARP++) - { - if (FULL_TILE || (SUCCESSOR_WARP * LOGICAL_WARP_SIZE < num_valid)) - { - warp_aggregate = reduction_op(warp_aggregate, temp_storage.warp_aggregates[SUCCESSOR_WARP]); - } - } - } - - return warp_aggregate; - } - - - /// Computes a threadblock-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. - template - __device__ __forceinline__ T Sum( - T input, ///< [in] Calling thread's input partial reductions - int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) - { - cub::Sum reduction_op; - unsigned int warp_offset = warp_id * LOGICAL_WARP_SIZE; - unsigned int warp_num_valid = (FULL_TILE && EVEN_WARP_MULTIPLE) ? - LOGICAL_WARP_SIZE : - (warp_offset < num_valid) ? - num_valid - warp_offset : - 0; - - // Warp reduction in every warp - T warp_aggregate = WarpReduce(temp_storage.warp_reduce, warp_id, lane_id).template Sum<(FULL_TILE && EVEN_WARP_MULTIPLE), 1>( - input, - warp_num_valid); - - // Update outputs and block_aggregate with warp-wide aggregates from lane-0s - return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid); - } - - - /// Computes a threadblock-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. - template < - bool FULL_TILE, - typename ReductionOp> - __device__ __forceinline__ T Reduce( - T input, ///< [in] Calling thread's input partial reductions - int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) - ReductionOp reduction_op) ///< [in] Binary reduction operator - { - unsigned int warp_id = (WARPS == 1) ? 0 : (linear_tid / LOGICAL_WARP_SIZE); - unsigned int warp_offset = warp_id * LOGICAL_WARP_SIZE; - unsigned int warp_num_valid = (FULL_TILE && EVEN_WARP_MULTIPLE) ? - LOGICAL_WARP_SIZE : - (warp_offset < num_valid) ? - num_valid - warp_offset : - 0; - - // Warp reduction in every warp - T warp_aggregate = WarpReduce(temp_storage.warp_reduce, warp_id, lane_id).template Reduce<(FULL_TILE && EVEN_WARP_MULTIPLE), 1>( - input, - warp_num_valid, - reduction_op); - - // Update outputs and block_aggregate with warp-wide aggregates from lane-0s - return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid); - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/specializations/block_scan_raking.cuh b/kokkos/kokkos/TPL/cub/block/specializations/block_scan_raking.cuh deleted file mode 100644 index 75e15d9..0000000 --- a/kokkos/kokkos/TPL/cub/block/specializations/block_scan_raking.cuh +++ /dev/null @@ -1,761 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - - -/** - * \file - * cub::BlockScanRaking provides variants of raking-based parallel prefix scan across a CUDA threadblock. - */ - -#pragma once - -#include "../../util_arch.cuh" -#include "../../block/block_raking_layout.cuh" -#include "../../thread/thread_reduce.cuh" -#include "../../thread/thread_scan.cuh" -#include "../../warp/warp_scan.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \brief BlockScanRaking provides variants of raking-based parallel prefix scan across a CUDA threadblock. - */ -template < - typename T, ///< Data type being scanned - int BLOCK_THREADS, ///< The thread block size in threads - bool MEMOIZE> ///< Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure -struct BlockScanRaking -{ - /// Layout type for padded threadblock raking grid - typedef BlockRakingLayout BlockRakingLayout; - - /// Constants - enum - { - /// Number of active warps - WARPS = (BLOCK_THREADS + PtxArchProps::WARP_THREADS - 1) / PtxArchProps::WARP_THREADS, - - /// Number of raking threads - RAKING_THREADS = BlockRakingLayout::RAKING_THREADS, - - /// Number of raking elements per warp synchronous raking thread - SEGMENT_LENGTH = BlockRakingLayout::SEGMENT_LENGTH, - - /// Cooperative work can be entirely warp synchronous - WARP_SYNCHRONOUS = (BLOCK_THREADS == RAKING_THREADS), - }; - - /// WarpScan utility type - typedef WarpScan WarpScan; - - /// Shared memory storage layout type - struct _TempStorage - { - typename WarpScan::TempStorage warp_scan; ///< Buffer for warp-synchronous scan - typename BlockRakingLayout::TempStorage raking_grid; ///< Padded threadblock raking grid - T block_aggregate; ///< Block aggregate - }; - - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - // Thread fields - _TempStorage &temp_storage; - int linear_tid; - T cached_segment[SEGMENT_LENGTH]; - - - /// Constructor - __device__ __forceinline__ BlockScanRaking( - TempStorage &temp_storage, - int linear_tid) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid) - {} - - /// Performs upsweep raking reduction, returning the aggregate - template - __device__ __forceinline__ T Upsweep( - ScanOp scan_op) - { - T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); - T *raking_ptr; - - if (MEMOIZE) - { - // Copy data into registers - #pragma unroll - for (int i = 0; i < SEGMENT_LENGTH; i++) - { - cached_segment[i] = smem_raking_ptr[i]; - } - raking_ptr = cached_segment; - } - else - { - raking_ptr = smem_raking_ptr; - } - - T raking_partial = raking_ptr[0]; - - #pragma unroll - for (int i = 1; i < SEGMENT_LENGTH; i++) - { - if ((BlockRakingLayout::UNGUARDED) || (((linear_tid * SEGMENT_LENGTH) + i) < BLOCK_THREADS)) - { - raking_partial = scan_op(raking_partial, raking_ptr[i]); - } - } - - return raking_partial; - } - - - /// Performs exclusive downsweep raking scan - template - __device__ __forceinline__ void ExclusiveDownsweep( - ScanOp scan_op, - T raking_partial, - bool apply_prefix = true) - { - T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); - - T *raking_ptr = (MEMOIZE) ? - cached_segment : - smem_raking_ptr; - - ThreadScanExclusive(raking_ptr, raking_ptr, scan_op, raking_partial, apply_prefix); - - if (MEMOIZE) - { - // Copy data back to smem - #pragma unroll - for (int i = 0; i < SEGMENT_LENGTH; i++) - { - smem_raking_ptr[i] = cached_segment[i]; - } - } - } - - - /// Performs inclusive downsweep raking scan - template - __device__ __forceinline__ void InclusiveDownsweep( - ScanOp scan_op, - T raking_partial, - bool apply_prefix = true) - { - T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); - - T *raking_ptr = (MEMOIZE) ? - cached_segment : - smem_raking_ptr; - - ThreadScanInclusive(raking_ptr, raking_ptr, scan_op, raking_partial, apply_prefix); - - if (MEMOIZE) - { - // Copy data back to smem - #pragma unroll - for (int i = 0; i < SEGMENT_LENGTH; i++) - { - smem_raking_ptr[i] = cached_segment[i]; - } - } - } - - - /// Computes an exclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input items - T &output, ///< [out] Calling thread's output items (may be aliased to \p input) - const T &identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveScan( - input, - output, - identity, - scan_op, - block_aggregate); - } - else - { - // Place thread partial into shared memory raking grid - T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - *placement_ptr = input; - - __syncthreads(); - - // Reduce parallelism down to just raking threads - if (linear_tid < RAKING_THREADS) - { - // Raking upsweep reduction in grid - T raking_partial = Upsweep(scan_op); - - // Exclusive warp synchronous scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveScan( - raking_partial, - raking_partial, - identity, - scan_op, - temp_storage.block_aggregate); - - // Exclusive raking downsweep scan - ExclusiveDownsweep(scan_op, raking_partial); - } - - __syncthreads(); - - // Grab thread prefix from shared memory - output = *placement_ptr; - - // Retrieve block aggregate - block_aggregate = temp_storage.block_aggregate; - } - } - - - /// Computes an exclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template < - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a threadblock-wide prefix to be applied to all inputs. - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveScan( - input, - output, - identity, - scan_op, - block_aggregate, - block_prefix_op); - } - else - { - // Place thread partial into shared memory raking grid - T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - *placement_ptr = input; - - __syncthreads(); - - // Reduce parallelism down to just raking threads - if (linear_tid < RAKING_THREADS) - { - // Raking upsweep reduction in grid - T raking_partial = Upsweep(scan_op); - - // Exclusive warp synchronous scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveScan( - raking_partial, - raking_partial, - identity, - scan_op, - temp_storage.block_aggregate, - block_prefix_op); - - // Exclusive raking downsweep scan - ExclusiveDownsweep(scan_op, raking_partial); - } - - __syncthreads(); - - // Grab thread prefix from shared memory - output = *placement_ptr; - - // Retrieve block aggregate - block_aggregate = temp_storage.block_aggregate; - } - } - - - /// Computes an exclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no identity value, the output computed for thread0 is undefined. - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveScan( - input, - output, - scan_op, - block_aggregate); - } - else - { - // Place thread partial into shared memory raking grid - T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - *placement_ptr = input; - - __syncthreads(); - - // Reduce parallelism down to just raking threads - if (linear_tid < RAKING_THREADS) - { - // Raking upsweep reduction in grid - T raking_partial = Upsweep(scan_op); - - // Exclusive warp synchronous scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveScan( - raking_partial, - raking_partial, - scan_op, - temp_storage.block_aggregate); - - // Exclusive raking downsweep scan - ExclusiveDownsweep(scan_op, raking_partial, (linear_tid != 0)); - } - - __syncthreads(); - - // Grab thread prefix from shared memory - output = *placement_ptr; - - // Retrieve block aggregate - block_aggregate = temp_storage.block_aggregate; - } - } - - - /// Computes an exclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template < - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a threadblock-wide prefix to be applied to all inputs. - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveScan( - input, - output, - scan_op, - block_aggregate, - block_prefix_op); - } - else - { - // Place thread partial into shared memory raking grid - T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - *placement_ptr = input; - - __syncthreads(); - - // Reduce parallelism down to just raking threads - if (linear_tid < RAKING_THREADS) - { - // Raking upsweep reduction in grid - T raking_partial = Upsweep(scan_op); - - // Exclusive warp synchronous scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveScan( - raking_partial, - raking_partial, - scan_op, - temp_storage.block_aggregate, - block_prefix_op); - - // Exclusive raking downsweep scan - ExclusiveDownsweep(scan_op, raking_partial); - } - - __syncthreads(); - - // Grab thread prefix from shared memory - output = *placement_ptr; - - // Retrieve block aggregate - block_aggregate = temp_storage.block_aggregate; - } - } - - - /// Computes an exclusive threadblock-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - __device__ __forceinline__ void ExclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveSum( - input, - output, - block_aggregate); - } - else - { - // Raking scan - Sum scan_op; - - // Place thread partial into shared memory raking grid - T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - *placement_ptr = input; - - __syncthreads(); - - // Reduce parallelism down to just raking threads - if (linear_tid < RAKING_THREADS) - { - // Raking upsweep reduction in grid - T raking_partial = Upsweep(scan_op); - - // Exclusive warp synchronous scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveSum( - raking_partial, - raking_partial, - temp_storage.block_aggregate); - - // Exclusive raking downsweep scan - ExclusiveDownsweep(scan_op, raking_partial); - } - - __syncthreads(); - - // Grab thread prefix from shared memory - output = *placement_ptr; - - // Retrieve block aggregate - block_aggregate = temp_storage.block_aggregate; - } - } - - - /// Computes an exclusive threadblock-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Instead of using 0 as the threadblock-wide prefix, the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template - __device__ __forceinline__ void ExclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a threadblock-wide prefix to be applied to all inputs. - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveSum( - input, - output, - block_aggregate, - block_prefix_op); - } - else - { - // Raking scan - Sum scan_op; - - // Place thread partial into shared memory raking grid - T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - *placement_ptr = input; - - __syncthreads(); - - // Reduce parallelism down to just raking threads - if (linear_tid < RAKING_THREADS) - { - // Raking upsweep reduction in grid - T raking_partial = Upsweep(scan_op); - - // Exclusive warp synchronous scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveSum( - raking_partial, - raking_partial, - temp_storage.block_aggregate, - block_prefix_op); - - // Exclusive raking downsweep scan - ExclusiveDownsweep(scan_op, raking_partial); - } - - __syncthreads(); - - // Grab thread prefix from shared memory - output = *placement_ptr; - - // Retrieve block aggregate - block_aggregate = temp_storage.block_aggregate; - } - } - - - /// Computes an inclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).InclusiveScan( - input, - output, - scan_op, - block_aggregate); - } - else - { - // Place thread partial into shared memory raking grid - T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - *placement_ptr = input; - - __syncthreads(); - - // Reduce parallelism down to just raking threads - if (linear_tid < RAKING_THREADS) - { - // Raking upsweep reduction in grid - T raking_partial = Upsweep(scan_op); - - // Exclusive warp synchronous scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveScan( - raking_partial, - raking_partial, - scan_op, - temp_storage.block_aggregate); - - // Inclusive raking downsweep scan - InclusiveDownsweep(scan_op, raking_partial, (linear_tid != 0)); - } - - __syncthreads(); - - // Grab thread prefix from shared memory - output = *placement_ptr; - - // Retrieve block aggregate - block_aggregate = temp_storage.block_aggregate; - } - } - - - /// Computes an inclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template < - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a threadblock-wide prefix to be applied to all inputs. - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).InclusiveScan( - input, - output, - scan_op, - block_aggregate, - block_prefix_op); - } - else - { - // Place thread partial into shared memory raking grid - T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - *placement_ptr = input; - - __syncthreads(); - - // Reduce parallelism down to just raking threads - if (linear_tid < RAKING_THREADS) - { - // Raking upsweep reduction in grid - T raking_partial = Upsweep(scan_op); - - // Warp synchronous scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveScan( - raking_partial, - raking_partial, - scan_op, - temp_storage.block_aggregate, - block_prefix_op); - - // Inclusive raking downsweep scan - InclusiveDownsweep(scan_op, raking_partial); - } - - __syncthreads(); - - // Grab thread prefix from shared memory - output = *placement_ptr; - - // Retrieve block aggregate - block_aggregate = temp_storage.block_aggregate; - } - } - - - /// Computes an inclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).InclusiveSum( - input, - output, - block_aggregate); - } - else - { - // Raking scan - Sum scan_op; - - // Place thread partial into shared memory raking grid - T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - *placement_ptr = input; - - __syncthreads(); - - // Reduce parallelism down to just raking threads - if (linear_tid < RAKING_THREADS) - { - // Raking upsweep reduction in grid - T raking_partial = Upsweep(scan_op); - - // Exclusive warp synchronous scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveSum( - raking_partial, - raking_partial, - temp_storage.block_aggregate); - - // Inclusive raking downsweep scan - InclusiveDownsweep(scan_op, raking_partial, (linear_tid != 0)); - } - - __syncthreads(); - - // Grab thread prefix from shared memory - output = *placement_ptr; - - // Retrieve block aggregate - block_aggregate = temp_storage.block_aggregate; - } - } - - - /// Computes an inclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Instead of using 0 as the threadblock-wide prefix, the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a threadblock-wide prefix to be applied to all inputs. - { - if (WARP_SYNCHRONOUS) - { - // Short-circuit directly to warp scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).InclusiveSum( - input, - output, - block_aggregate, - block_prefix_op); - } - else - { - // Raking scan - Sum scan_op; - - // Place thread partial into shared memory raking grid - T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - *placement_ptr = input; - - __syncthreads(); - - // Reduce parallelism down to just raking threads - if (linear_tid < RAKING_THREADS) - { - // Raking upsweep reduction in grid - T raking_partial = Upsweep(scan_op); - - // Warp synchronous scan - WarpScan(temp_storage.warp_scan, 0, linear_tid).ExclusiveSum( - raking_partial, - raking_partial, - temp_storage.block_aggregate, - block_prefix_op); - - // Inclusive raking downsweep scan - InclusiveDownsweep(scan_op, raking_partial); - } - - __syncthreads(); - - // Grab thread prefix from shared memory - output = *placement_ptr; - - // Retrieve block aggregate - block_aggregate = temp_storage.block_aggregate; - } - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/block/specializations/block_scan_warp_scans.cuh b/kokkos/kokkos/TPL/cub/block/specializations/block_scan_warp_scans.cuh deleted file mode 100644 index f7af361..0000000 --- a/kokkos/kokkos/TPL/cub/block/specializations/block_scan_warp_scans.cuh +++ /dev/null @@ -1,342 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockScanWarpscans provides warpscan-based variants of parallel prefix scan across a CUDA threadblock. - */ - -#pragma once - -#include "../../util_arch.cuh" -#include "../../warp/warp_scan.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \brief BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA threadblock. - */ -template < - typename T, - int BLOCK_THREADS> -struct BlockScanWarpScans -{ - /// Constants - enum - { - /// Number of active warps - WARPS = (BLOCK_THREADS + PtxArchProps::WARP_THREADS - 1) / PtxArchProps::WARP_THREADS, - }; - - /// WarpScan utility type - typedef WarpScan WarpScan; - - /// Shared memory storage layout type - struct _TempStorage - { - typename WarpScan::TempStorage warp_scan; ///< Buffer for warp-synchronous scan - T warp_aggregates[WARPS]; ///< Shared totals from each warp-synchronous scan - T block_prefix; ///< Shared prefix for the entire threadblock - }; - - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - // Thread fields - _TempStorage &temp_storage; - int linear_tid; - int warp_id; - int lane_id; - - - /// Constructor - __device__ __forceinline__ BlockScanWarpScans( - TempStorage &temp_storage, - int linear_tid) - : - temp_storage(temp_storage.Alias()), - linear_tid(linear_tid), - warp_id((BLOCK_THREADS <= PtxArchProps::WARP_THREADS) ? - 0 : - linear_tid / PtxArchProps::WARP_THREADS), - lane_id((BLOCK_THREADS <= PtxArchProps::WARP_THREADS) ? - linear_tid : - linear_tid % PtxArchProps::WARP_THREADS) - {} - - - /// Update the calling thread's partial reduction with the warp-wide aggregates from preceding warps. Also returns block-wide aggregate in thread0. - template - __device__ __forceinline__ void ApplyWarpAggregates( - T &partial, ///< [out] The calling thread's partial reduction - ScanOp scan_op, ///< [in] Binary scan operator - T warp_aggregate, ///< [in] [lane0s only] Warp-wide aggregate reduction of input items - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items - bool lane_valid = true) ///< [in] Whether or not the partial belonging to the current thread is valid - { - // Share lane aggregates - temp_storage.warp_aggregates[warp_id] = warp_aggregate; - - __syncthreads(); - - block_aggregate = temp_storage.warp_aggregates[0]; - - #pragma unroll - for (int WARP = 1; WARP < WARPS; WARP++) - { - if (warp_id == WARP) - { - partial = (lane_valid) ? - scan_op(block_aggregate, partial) : // fold it in our valid partial - block_aggregate; // replace our invalid partial with the aggregate - } - - block_aggregate = scan_op(block_aggregate, temp_storage.warp_aggregates[WARP]); - } - } - - - /// Computes an exclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input items - T &output, ///< [out] Calling thread's output items (may be aliased to \p input) - const T &identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items - { - T warp_aggregate; - WarpScan(temp_storage.warp_scan, warp_id, lane_id).ExclusiveScan(input, output, identity, scan_op, warp_aggregate); - - // Update outputs and block_aggregate with warp-wide aggregates - ApplyWarpAggregates(output, scan_op, warp_aggregate, block_aggregate); - } - - - /// Computes an exclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template < - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a threadblock-wide prefix to be applied to all inputs. - { - ExclusiveScan(input, output, identity, scan_op, block_aggregate); - - // Compute and share threadblock prefix - if (warp_id == 0) - { - temp_storage.block_prefix = block_prefix_op(block_aggregate); - } - - __syncthreads(); - - // Incorporate threadblock prefix into outputs - output = scan_op(temp_storage.block_prefix, output); - } - - - /// Computes an exclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no identity value, the output computed for thread0 is undefined. - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items - { - T warp_aggregate; - WarpScan(temp_storage.warp_scan, warp_id, lane_id).ExclusiveScan(input, output, scan_op, warp_aggregate); - - // Update outputs and block_aggregate with warp-wide aggregates - ApplyWarpAggregates(output, scan_op, warp_aggregate, block_aggregate, (lane_id > 0)); - } - - - /// Computes an exclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template < - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a threadblock-wide prefix to be applied to all inputs. - { - ExclusiveScan(input, output, scan_op, block_aggregate); - - // Compute and share threadblock prefix - if (warp_id == 0) - { - temp_storage.block_prefix = block_prefix_op(block_aggregate); - } - - __syncthreads(); - - // Incorporate threadblock prefix into outputs - output = (linear_tid == 0) ? - temp_storage.block_prefix : - scan_op(temp_storage.block_prefix, output); - } - - - /// Computes an exclusive threadblock-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - __device__ __forceinline__ void ExclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items - { - T warp_aggregate; - WarpScan(temp_storage.warp_scan, warp_id, lane_id).ExclusiveSum(input, output, warp_aggregate); - - // Update outputs and block_aggregate with warp-wide aggregates from lane-0s - ApplyWarpAggregates(output, Sum(), warp_aggregate, block_aggregate); - } - - - /// Computes an exclusive threadblock-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Instead of using 0 as the threadblock-wide prefix, the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template - __device__ __forceinline__ void ExclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a threadblock-wide prefix to be applied to all inputs. - { - ExclusiveSum(input, output, block_aggregate); - - // Compute and share threadblock prefix - if (warp_id == 0) - { - temp_storage.block_prefix = block_prefix_op(block_aggregate); - } - - __syncthreads(); - - // Incorporate threadblock prefix into outputs - Sum scan_op; - output = scan_op(temp_storage.block_prefix, output); - } - - - /// Computes an inclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items - { - T warp_aggregate; - WarpScan(temp_storage.warp_scan, warp_id, lane_id).InclusiveScan(input, output, scan_op, warp_aggregate); - - // Update outputs and block_aggregate with warp-wide aggregates from lane-0s - ApplyWarpAggregates(output, scan_op, warp_aggregate, block_aggregate); - - } - - - /// Computes an inclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template < - typename ScanOp, - typename BlockPrefixOp> - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a threadblock-wide prefix to be applied to all inputs. - { - InclusiveScan(input, output, scan_op, block_aggregate); - - // Compute and share threadblock prefix - if (warp_id == 0) - { - temp_storage.block_prefix = block_prefix_op(block_aggregate); - } - - __syncthreads(); - - // Incorporate threadblock prefix into outputs - output = scan_op(temp_storage.block_prefix, output); - } - - - /// Computes an inclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items - { - T warp_aggregate; - WarpScan(temp_storage.warp_scan, warp_id, lane_id).InclusiveSum(input, output, warp_aggregate); - - // Update outputs and block_aggregate with warp-wide aggregates from lane-0s - ApplyWarpAggregates(output, Sum(), warp_aggregate, block_aggregate); - } - - - /// Computes an inclusive threadblock-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Instead of using 0 as the threadblock-wide prefix, the call-back functor \p block_prefix_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the threadblock's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. - template - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item - T &output, ///< [out] Calling thread's output item (may be aliased to \p input) - T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items (exclusive of the \p block_prefix_op value) - BlockPrefixOp &block_prefix_op) ///< [in-out] [warp0 only] Call-back functor for specifying a threadblock-wide prefix to be applied to all inputs. - { - InclusiveSum(input, output, block_aggregate); - - // Compute and share threadblock prefix - if (warp_id == 0) - { - temp_storage.block_prefix = block_prefix_op(block_aggregate); - } - - __syncthreads(); - - // Incorporate threadblock prefix into outputs - Sum scan_op; - output = scan_op(temp_storage.block_prefix, output); - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/cub.cuh b/kokkos/kokkos/TPL/cub/cub.cuh deleted file mode 100644 index dbb77da..0000000 --- a/kokkos/kokkos/TPL/cub/cub.cuh +++ /dev/null @@ -1,84 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * CUB umbrella include file - */ - -#pragma once - - -// Block -#include "block/block_histogram.cuh" -#include "block/block_discontinuity.cuh" -#include "block/block_exchange.cuh" -#include "block/block_load.cuh" -#include "block/block_radix_rank.cuh" -#include "block/block_radix_sort.cuh" -#include "block/block_reduce.cuh" -#include "block/block_scan.cuh" -#include "block/block_store.cuh" - -// Device -#include "device/device_histogram.cuh" -#include "device/device_radix_sort.cuh" -#include "device/device_reduce.cuh" -#include "device/device_scan.cuh" - -// Grid -//#include "grid/grid_barrier.cuh" -#include "grid/grid_even_share.cuh" -#include "grid/grid_mapping.cuh" -#include "grid/grid_queue.cuh" - -// Host -#include "host/spinlock.cuh" - -// Thread -#include "thread/thread_load.cuh" -#include "thread/thread_operators.cuh" -#include "thread/thread_reduce.cuh" -#include "thread/thread_scan.cuh" -#include "thread/thread_store.cuh" - -// Warp -#include "warp/warp_reduce.cuh" -#include "warp/warp_scan.cuh" - -// Util -#include "util_allocator.cuh" -#include "util_arch.cuh" -#include "util_debug.cuh" -#include "util_device.cuh" -#include "util_macro.cuh" -#include "util_ptx.cuh" -#include "util_type.cuh" -#include "util_iterator.cuh" -#include "util_vector.cuh" - diff --git a/kokkos/kokkos/TPL/cub/device/block/block_histo_tiles.cuh b/kokkos/kokkos/TPL/cub/device/block/block_histo_tiles.cuh deleted file mode 100644 index e1165d6..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/block_histo_tiles.cuh +++ /dev/null @@ -1,322 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockHistogramTiles implements a stateful abstraction of CUDA thread blocks for histogramming multiple tiles as part of device-wide histogram. - */ - -#pragma once - -#include - -#include "specializations/block_histo_tiles_gatomic.cuh" -#include "specializations/block_histo_tiles_satomic.cuh" -#include "specializations/block_histo_tiles_sort.cuh" -#include "../../util_type.cuh" -#include "../../grid/grid_mapping.cuh" -#include "../../grid/grid_even_share.cuh" -#include "../../grid/grid_queue.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Algorithmic variants - ******************************************************************************/ - - -/** - * \brief BlockHistogramTilesAlgorithm enumerates alternative algorithms for BlockHistogramTiles. - */ -enum BlockHistogramTilesAlgorithm -{ - - /** - * \par Overview - * A two-kernel approach in which: - * -# Thread blocks in the first kernel aggregate their own privatized - * histograms using block-wide sorting (see BlockHistogramAlgorithm::BLOCK_HISTO_SORT). - * -# A single thread block in the second kernel reduces them into the output histogram(s). - * - * \par Performance Considerations - * Delivers consistent throughput regardless of sample bin distribution. - * - * However, because histograms are privatized in shared memory, a large - * number of bins (e.g., thousands) may adversely affect occupancy and - * performance (or even the ability to launch). - */ - GRID_HISTO_SORT, - - - /** - * \par Overview - * A two-kernel approach in which: - * -# Thread blocks in the first kernel aggregate their own privatized - * histograms using shared-memory \p atomicAdd(). - * -# A single thread block in the second kernel reduces them into the - * output histogram(s). - * - * \par Performance Considerations - * Performance is strongly tied to the hardware implementation of atomic - * addition, and may be significantly degraded for non uniformly-random - * input distributions where many concurrent updates are likely to be - * made to the same bin counter. - * - * However, because histograms are privatized in shared memory, a large - * number of bins (e.g., thousands) may adversely affect occupancy and - * performance (or even the ability to launch). - */ - GRID_HISTO_SHARED_ATOMIC, - - - /** - * \par Overview - * A single-kernel approach in which thread blocks update the output histogram(s) directly - * using global-memory \p atomicAdd(). - * - * \par Performance Considerations - * Performance is strongly tied to the hardware implementation of atomic - * addition, and may be significantly degraded for non uniformly-random - * input distributions where many concurrent updates are likely to be - * made to the same bin counter. - * - * Performance is not significantly impacted when computing histograms having large - * numbers of bins (e.g., thousands). - */ - GRID_HISTO_GLOBAL_ATOMIC, - -}; - - -/****************************************************************************** - * Tuning policy - ******************************************************************************/ - -/** - * Tuning policy for BlockHistogramTiles - */ -template < - int _BLOCK_THREADS, - int _ITEMS_PER_THREAD, - BlockHistogramTilesAlgorithm _GRID_ALGORITHM, - GridMappingStrategy _GRID_MAPPING, - int _SM_OCCUPANCY> -struct BlockHistogramTilesPolicy -{ - enum - { - BLOCK_THREADS = _BLOCK_THREADS, - ITEMS_PER_THREAD = _ITEMS_PER_THREAD, - SM_OCCUPANCY = _SM_OCCUPANCY, - }; - - static const BlockHistogramTilesAlgorithm GRID_ALGORITHM = _GRID_ALGORITHM; - static const GridMappingStrategy GRID_MAPPING = _GRID_MAPPING; -}; - - - -/****************************************************************************** - * Thread block abstractions - ******************************************************************************/ - - -/** - * Implements a stateful abstraction of CUDA thread blocks for histogramming multiple tiles as part of device-wide histogram using global atomics - */ -template < - typename BlockHistogramTilesPolicy, ///< Tuning policy - int BINS, ///< Number of histogram bins per channel - int CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of active channels being histogrammed) - int ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed - typename InputIteratorRA, ///< The input iterator type (may be a simple pointer type). Must have a value type that can be cast as an integer in the range [0..BINS-1] - typename HistoCounter, ///< Integral type for counting sample occurrences per histogram bin - typename SizeT> ///< Integer type for offsets -struct BlockHistogramTiles -{ - //--------------------------------------------------------------------- - // Types and constants - //--------------------------------------------------------------------- - - // Histogram grid algorithm - static const BlockHistogramTilesAlgorithm GRID_ALGORITHM = BlockHistogramTilesPolicy::GRID_ALGORITHM; - - // Alternative internal implementation types - typedef BlockHistogramTilesSort< BlockHistogramTilesPolicy, BINS, CHANNELS, ACTIVE_CHANNELS, InputIteratorRA, HistoCounter, SizeT> BlockHistogramTilesSortT; - typedef BlockHistogramTilesSharedAtomic< BlockHistogramTilesPolicy, BINS, CHANNELS, ACTIVE_CHANNELS, InputIteratorRA, HistoCounter, SizeT> BlockHistogramTilesSharedAtomicT; - typedef BlockHistogramTilesGlobalAtomic< BlockHistogramTilesPolicy, BINS, CHANNELS, ACTIVE_CHANNELS, InputIteratorRA, HistoCounter, SizeT> BlockHistogramTilesGlobalAtomicT; - - // Internal block sweep histogram type - typedef typename If<(GRID_ALGORITHM == GRID_HISTO_SORT), - BlockHistogramTilesSortT, - typename If<(GRID_ALGORITHM == GRID_HISTO_SHARED_ATOMIC), - BlockHistogramTilesSharedAtomicT, - BlockHistogramTilesGlobalAtomicT>::Type>::Type InternalBlockDelegate; - - enum - { - TILE_ITEMS = InternalBlockDelegate::TILE_ITEMS, - }; - - - // Temporary storage type - typedef typename InternalBlockDelegate::TempStorage TempStorage; - - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - // Internal block delegate - InternalBlockDelegate internal_delegate; - - - //--------------------------------------------------------------------- - // Interface - //--------------------------------------------------------------------- - - /** - * Constructor - */ - __device__ __forceinline__ BlockHistogramTiles( - TempStorage &temp_storage, ///< Reference to temp_storage - InputIteratorRA d_in, ///< Input data to reduce - HistoCounter* (&d_out_histograms)[ACTIVE_CHANNELS]) ///< Reference to output histograms - : - internal_delegate(temp_storage, d_in, d_out_histograms) - {} - - - /** - * \brief Reduce a consecutive segment of input tiles - */ - __device__ __forceinline__ void ConsumeTiles( - SizeT block_offset, ///< [in] Threadblock begin offset (inclusive) - SizeT block_oob) ///< [in] Threadblock end offset (exclusive) - { - // Consume subsequent full tiles of input - while (block_offset + TILE_ITEMS <= block_oob) - { - internal_delegate.ConsumeTile(block_offset); - block_offset += TILE_ITEMS; - } - - // Consume a partially-full tile - if (block_offset < block_oob) - { - int valid_items = block_oob - block_offset; - internal_delegate.ConsumeTile(block_offset, valid_items); - } - - // Aggregate output - internal_delegate.AggregateOutput(); - } - - - /** - * Reduce a consecutive segment of input tiles - */ - __device__ __forceinline__ void ConsumeTiles( - SizeT num_items, ///< [in] Total number of global input items - GridEvenShare &even_share, ///< [in] GridEvenShare descriptor - GridQueue &queue, ///< [in,out] GridQueue descriptor - Int2Type is_even_share) ///< [in] Marker type indicating this is an even-share mapping - { - even_share.BlockInit(); - ConsumeTiles(even_share.block_offset, even_share.block_oob); - } - - - /** - * Dequeue and reduce tiles of items as part of a inter-block scan - */ - __device__ __forceinline__ void ConsumeTiles( - int num_items, ///< Total number of input items - GridQueue queue) ///< Queue descriptor for assigning tiles of work to thread blocks - { - // Shared block offset - __shared__ SizeT shared_block_offset; - - // We give each thread block at least one tile of input. - SizeT block_offset = blockIdx.x * TILE_ITEMS; - SizeT even_share_base = gridDim.x * TILE_ITEMS; - - // Process full tiles of input - while (block_offset + TILE_ITEMS <= num_items) - { - internal_delegate.ConsumeTile(block_offset); - - // Dequeue up to TILE_ITEMS - if (threadIdx.x == 0) - shared_block_offset = queue.Drain(TILE_ITEMS) + even_share_base; - - __syncthreads(); - - block_offset = shared_block_offset; - - __syncthreads(); - } - - // Consume a partially-full tile - if (block_offset < num_items) - { - int valid_items = num_items - block_offset; - internal_delegate.ConsumeTile(block_offset, valid_items); - } - - // Aggregate output - internal_delegate.AggregateOutput(); - } - - - /** - * Dequeue and reduce tiles of items as part of a inter-block scan - */ - __device__ __forceinline__ void ConsumeTiles( - SizeT num_items, ///< [in] Total number of global input items - GridEvenShare &even_share, ///< [in] GridEvenShare descriptor - GridQueue &queue, ///< [in,out] GridQueue descriptor - Int2Type is_dynamic) ///< [in] Marker type indicating this is a dynamic mapping - { - ConsumeTiles(num_items, queue); - } - - -}; - - - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/block/block_partition_tiles.cuh b/kokkos/kokkos/TPL/cub/device/block/block_partition_tiles.cuh deleted file mode 100644 index 4597773..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/block_partition_tiles.cuh +++ /dev/null @@ -1,381 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockPartitionTiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide list partitioning. - */ - -#pragma once - -#include - -#include "scan_tiles_types.cuh" -#include "../../thread/thread_operators.cuh" -#include "../../block/block_load.cuh" -#include "../../block/block_store.cuh" -#include "../../block/block_scan.cuh" -#include "../../grid/grid_queue.cuh" -#include "../../util_vector.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Tuning policy types - ******************************************************************************/ - -/** - * Tuning policy for BlockPartitionTiles - */ -template < - int _PARTITIONS, - int _BLOCK_THREADS, - int _ITEMS_PER_THREAD, - PtxLoadModifier _LOAD_MODIFIER, - BlockScanAlgorithm _SCAN_ALGORITHM> -struct BlockPartitionTilesPolicy -{ - enum - { - PARTITIONS = _PARTITIONS, - BLOCK_THREADS = _BLOCK_THREADS, - ITEMS_PER_THREAD = _ITEMS_PER_THREAD, - }; - - static const PtxLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; - static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; -}; - - - -/** - * Tuple type for scanning partition membership flags - */ -template < - typename SizeT, - int PARTITIONS> -struct PartitionScanTuple; - - -/** - * Tuple type for scanning partition membership flags (specialized for 1 output partition) - */ -template -struct PartitionScanTuple : VectorHelper::Type -{ - __device__ __forceinline__ PartitionScanTuple operator+(const PartitionScanTuple &other) - { - PartitionScanTuple retval; - retval.x = x + other.x; - return retval; - } - - template - __device__ __forceinline__ void SetFlags(PredicateOp pred_op, T val) - { - this->x = pred_op(val); - } - - template - __device__ __forceinline__ void Scatter(PredicateOp pred_op, T val, OutputIteratorRA d_out, SizeT num_items) - { - if (pred_op(val)) - d_out[this->x - 1] = val; - } - -}; - - -/** - * Tuple type for scanning partition membership flags (specialized for 2 output partitions) - */ -template -struct PartitionScanTuple : VectorHelper::Type -{ - __device__ __forceinline__ PartitionScanTuple operator+(const PartitionScanTuple &other) - { - PartitionScanTuple retval; - retval.x = x + other.x; - retval.y = y + other.y; - return retval; - } - - template - __device__ __forceinline__ void SetFlags(PredicateOp pred_op, T val) - { - bool pred = pred_op(val); - this->x = pred; - this->y = !pred; - } - - template - __device__ __forceinline__ void Scatter(PredicateOp pred_op, T val, OutputIteratorRA d_out, SizeT num_items) - { - SizeT scatter_offset = (pred_op(val)) ? - this->x - 1 : - num_items - this->y; - - d_out[scatter_offset] = val; - } -}; - - - - -/****************************************************************************** - * Thread block abstractions - ******************************************************************************/ - -/** - * \brief BlockPartitionTiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide list partitioning. - * - * Implements a single-pass "domino" strategy with adaptive prefix lookback. - */ -template < - typename BlockPartitionTilesPolicy, ///< Tuning policy - typename InputIteratorRA, ///< Input iterator type - typename OutputIteratorRA, ///< Output iterator type - typename PredicateOp, ///< Partition predicate functor type - typename SizeT> ///< Offset integer type -struct BlockPartitionTiles -{ - //--------------------------------------------------------------------- - // Types and constants - //--------------------------------------------------------------------- - - // Constants - enum - { - PARTITIONS = BlockPartitionTilesPolicy::PARTITIONS, - BLOCK_THREADS = BlockPartitionTilesPolicy::BLOCK_THREADS, - ITEMS_PER_THREAD = BlockPartitionTilesPolicy::ITEMS_PER_THREAD, - TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - }; - - // Load modifier - static const PtxLoadModifier LOAD_MODIFIER = BlockPartitionTilesPolicy::LOAD_MODIFIER; - - // Data type of input iterator - typedef typename std::iterator_traits::value_type T; - - // Tuple type for scanning partition membership flags - typedef PartitionScanTuple PartitionScanTuple; - - // Tile status descriptor type - typedef ScanTileDescriptor ScanTileDescriptorT; - - // Block scan type for scanning membership flag scan_tuples - typedef BlockScan< - PartitionScanTuple, - BlockPartitionTilesPolicy::BLOCK_THREADS, - BlockPartitionTilesPolicy::SCAN_ALGORITHM> BlockScanT; - - // Callback type for obtaining inter-tile prefix during block scan - typedef DeviceScanBlockPrefixOp InterblockPrefixOp; - - // Shared memory type for this threadblock - struct TempStorage - { - typename InterblockPrefixOp::TempStorage prefix; // Smem needed for cooperative prefix callback - typename BlockScanT::TempStorage scan; // Smem needed for tile scanning - SizeT tile_idx; // Shared tile index - }; - - - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - TempStorage &temp_storage; ///< Reference to temp_storage - InputIteratorRA d_in; ///< Input data - OutputIteratorRA d_out; ///< Output data - ScanTileDescriptorT *d_tile_status; ///< Global list of tile status - PredicateOp pred_op; ///< Unary predicate operator indicating membership in the first partition - SizeT num_items; ///< Total number of input items - - - //--------------------------------------------------------------------- - // Constructor - //--------------------------------------------------------------------- - - // Constructor - __device__ __forceinline__ - BlockPartitionTiles( - TempStorage &temp_storage, ///< Reference to temp_storage - InputIteratorRA d_in, ///< Input data - OutputIteratorRA d_out, ///< Output data - ScanTileDescriptorT *d_tile_status, ///< Global list of tile status - PredicateOp pred_op, ///< Unary predicate operator indicating membership in the first partition - SizeT num_items) ///< Total number of input items - : - temp_storage(temp_storage.Alias()), - d_in(d_in), - d_out(d_out), - d_tile_status(d_tile_status), - pred_op(pred_op), - num_items(num_items) - {} - - - //--------------------------------------------------------------------- - // Domino scan - //--------------------------------------------------------------------- - - /** - * Process a tile of input - */ - template - __device__ __forceinline__ void ConsumeTile( - int tile_idx, ///< Tile index - SizeT block_offset, ///< Tile offset - PartitionScanTuple &partition_ends) ///< Running total - { - T items[ITEMS_PER_THREAD]; - PartitionScanTuple scan_tuples[ITEMS_PER_THREAD]; - - // Load items - int valid_items = num_items - block_offset; - if (FULL_TILE) - LoadStriped(threadIdx.x, d_in + block_offset, items); - else - LoadStriped(threadIdx.x, d_in + block_offset, items, valid_items); - - // Prevent hoisting -// __syncthreads(); -// __threadfence_block(); - - // Set partition membership flags in scan scan_tuples - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - scan_tuples[ITEM].SetFlags(pred_op, items[ITEM]); - } - - // Perform inclusive scan over scan scan_tuples - PartitionScanTuple block_aggregate; - if (tile_idx == 0) - { - BlockScanT(temp_storage.scan).InclusiveScan(scan_tuples, scan_tuples, Sum(), block_aggregate); - partition_ends = block_aggregate; - - // Update tile status if there are successor tiles - if (FULL_TILE && (threadIdx.x == 0)) - ScanTileDescriptorT::SetPrefix(d_tile_status, block_aggregate); - } - else - { - InterblockPrefixOp prefix_op(d_tile_status, temp_storage.prefix, Sum(), tile_idx); - BlockScanT(temp_storage.scan).InclusiveScan(scan_tuples, scan_tuples, Sum(), block_aggregate, prefix_op); - partition_ends = prefix_op.inclusive_prefix; - } - - // Scatter items - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - // Scatter if not out-of-bounds - if (FULL_TILE || (threadIdx.x + (ITEM * BLOCK_THREADS) < valid_items)) - { - scan_tuples[ITEM].Scatter(pred_op, items[ITEM], d_out, num_items); - } - } - } - - - /** - * Dequeue and scan tiles of items as part of a domino scan - */ - __device__ __forceinline__ void ConsumeTiles( - GridQueue queue, ///< [in] Queue descriptor for assigning tiles of work to thread blocks - SizeT num_tiles, ///< [in] Total number of input tiles - PartitionScanTuple &partition_ends, ///< [out] Running partition end offsets - bool &is_last_tile) ///< [out] Whether or not this block handled the last tile (i.e., partition_ends is valid for the entire input) - { -#if CUB_PTX_ARCH < 200 - - // No concurrent kernels allowed and blocks are launched in increasing order, so just assign one tile per block (up to 65K blocks) - int tile_idx = blockIdx.x; - SizeT block_offset = SizeT(TILE_ITEMS) * tile_idx; - - if (block_offset + TILE_ITEMS <= num_items) - { - ConsumeTile(tile_idx, block_offset, partition_ends); - } - else if (block_offset < num_items) - { - ConsumeTile(tile_idx, block_offset, partition_ends); - } - is_last_tile = (tile_idx == num_tiles - 1); - -#else - - // Get first tile - if (threadIdx.x == 0) - temp_storage.tile_idx = queue.Drain(1); - - __syncthreads(); - - int tile_idx = temp_storage.tile_idx; - SizeT block_offset = SizeT(TILE_ITEMS) * tile_idx; - - while (block_offset + TILE_ITEMS <= num_items) - { - // Consume full tile - ConsumeTile(tile_idx, block_offset, partition_ends); - is_last_tile = (tile_idx == num_tiles - 1); - - // Get next tile - if (threadIdx.x == 0) - temp_storage.tile_idx = queue.Drain(1); - - __syncthreads(); - - tile_idx = temp_storage.tile_idx; - block_offset = SizeT(TILE_ITEMS) * tile_idx; - } - - // Consume a partially-full tile - if (block_offset < num_items) - { - ConsumeTile(tile_idx, block_offset, partition_ends); - is_last_tile = (tile_idx == num_tiles - 1); - } -#endif - } -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/block/block_radix_sort_downsweep_tiles.cuh b/kokkos/kokkos/TPL/cub/device/block/block_radix_sort_downsweep_tiles.cuh deleted file mode 100644 index 91d628e..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/block_radix_sort_downsweep_tiles.cuh +++ /dev/null @@ -1,713 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * BlockRadixSortDownsweepTiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep. - */ - - -#pragma once - -#include "../../thread/thread_load.cuh" -#include "../../block/block_load.cuh" -#include "../../block/block_store.cuh" -#include "../../block/block_radix_rank.cuh" -#include "../../block/block_exchange.cuh" -#include "../../util_type.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Tuning policy types - ******************************************************************************/ - -/** - * Types of scattering strategies - */ -enum RadixSortScatterAlgorithm -{ - RADIX_SORT_SCATTER_DIRECT, ///< Scatter directly from registers to global bins - RADIX_SORT_SCATTER_TWO_PHASE, ///< First scatter from registers into shared memory bins, then into global bins -}; - - -/** - * Tuning policy for BlockRadixSortDownsweepTiles - */ -template < - int _BLOCK_THREADS, ///< The number of threads per CTA - int _ITEMS_PER_THREAD, ///< The number of consecutive downsweep keys to process per thread - BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use - PtxLoadModifier _LOAD_MODIFIER, ///< The PTX cache-modifier to use for loads - bool _EXCHANGE_TIME_SLICING, ///< Whether or not to time-slice key/value exchanges through shared memory to lower shared memory pressure - bool _MEMOIZE_OUTER_SCAN, ///< Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure. See BlockScanAlgorithm::BLOCK_SCAN_RAKING_MEMOIZE for more details. - BlockScanAlgorithm _INNER_SCAN_ALGORITHM, ///< The cub::BlockScanAlgorithm algorithm to use - RadixSortScatterAlgorithm _SCATTER_ALGORITHM, ///< The scattering strategy to use - cudaSharedMemConfig _SMEM_CONFIG, ///< Shared memory bank mode (default: \p cudaSharedMemBankSizeFourByte) - int _RADIX_BITS> ///< The number of radix bits, i.e., log2(bins) -struct BlockRadixSortDownsweepTilesPolicy -{ - enum - { - BLOCK_THREADS = _BLOCK_THREADS, - ITEMS_PER_THREAD = _ITEMS_PER_THREAD, - EXCHANGE_TIME_SLICING = _EXCHANGE_TIME_SLICING, - RADIX_BITS = _RADIX_BITS, - MEMOIZE_OUTER_SCAN = _MEMOIZE_OUTER_SCAN, - TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - }; - - static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; - static const PtxLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; - static const BlockScanAlgorithm INNER_SCAN_ALGORITHM = _INNER_SCAN_ALGORITHM; - static const RadixSortScatterAlgorithm SCATTER_ALGORITHM = _SCATTER_ALGORITHM; - static const cudaSharedMemConfig SMEM_CONFIG = _SMEM_CONFIG; - - typedef BlockRadixSortDownsweepTilesPolicy< - BLOCK_THREADS, - ITEMS_PER_THREAD, - LOAD_ALGORITHM, - LOAD_MODIFIER, - EXCHANGE_TIME_SLICING, - MEMOIZE_OUTER_SCAN, - INNER_SCAN_ALGORITHM, - SCATTER_ALGORITHM, - SMEM_CONFIG, - CUB_MAX(1, RADIX_BITS - 1)> AltPolicy; -}; - - -/****************************************************************************** - * Thread block abstractions - ******************************************************************************/ - -/** - * CTA-wide "downsweep" abstraction for distributing keys from - * a range of input tiles. - */ -template < - typename BlockRadixSortDownsweepTilesPolicy, - typename Key, - typename Value, - typename SizeT> -struct BlockRadixSortDownsweepTiles -{ - //--------------------------------------------------------------------- - // Type definitions and constants - //--------------------------------------------------------------------- - - // Appropriate unsigned-bits representation of Key - typedef typename Traits::UnsignedBits UnsignedBits; - - static const UnsignedBits MIN_KEY = Traits::MIN_KEY; - static const UnsignedBits MAX_KEY = Traits::MAX_KEY; - - static const BlockLoadAlgorithm LOAD_ALGORITHM = BlockRadixSortDownsweepTilesPolicy::LOAD_ALGORITHM; - static const PtxLoadModifier LOAD_MODIFIER = BlockRadixSortDownsweepTilesPolicy::LOAD_MODIFIER; - static const BlockScanAlgorithm INNER_SCAN_ALGORITHM = BlockRadixSortDownsweepTilesPolicy::INNER_SCAN_ALGORITHM; - static const RadixSortScatterAlgorithm SCATTER_ALGORITHM = BlockRadixSortDownsweepTilesPolicy::SCATTER_ALGORITHM; - static const cudaSharedMemConfig SMEM_CONFIG = BlockRadixSortDownsweepTilesPolicy::SMEM_CONFIG; - - enum - { - BLOCK_THREADS = BlockRadixSortDownsweepTilesPolicy::BLOCK_THREADS, - ITEMS_PER_THREAD = BlockRadixSortDownsweepTilesPolicy::ITEMS_PER_THREAD, - EXCHANGE_TIME_SLICING = BlockRadixSortDownsweepTilesPolicy::EXCHANGE_TIME_SLICING, - RADIX_BITS = BlockRadixSortDownsweepTilesPolicy::RADIX_BITS, - MEMOIZE_OUTER_SCAN = BlockRadixSortDownsweepTilesPolicy::MEMOIZE_OUTER_SCAN, - TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - - RADIX_DIGITS = 1 << RADIX_BITS, - KEYS_ONLY = Equals::VALUE, - - WARP_THREADS = PtxArchProps::LOG_WARP_THREADS, - WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, - - BYTES_PER_SIZET = sizeof(SizeT), - LOG_BYTES_PER_SIZET = Log2::VALUE, - - LOG_SMEM_BANKS = PtxArchProps::LOG_SMEM_BANKS, - SMEM_BANKS = 1 << LOG_SMEM_BANKS, - - DIGITS_PER_SCATTER_PASS = BLOCK_THREADS / SMEM_BANKS, - SCATTER_PASSES = RADIX_DIGITS / DIGITS_PER_SCATTER_PASS, - - LOG_STORE_TXN_THREADS = LOG_SMEM_BANKS, - STORE_TXN_THREADS = 1 << LOG_STORE_TXN_THREADS, - }; - - // BlockRadixRank type - typedef BlockRadixRank< - BLOCK_THREADS, - RADIX_BITS, - MEMOIZE_OUTER_SCAN, - INNER_SCAN_ALGORITHM, - SMEM_CONFIG> BlockRadixRank; - - // BlockLoad type (keys) - typedef BlockLoad< - UnsignedBits*, - BLOCK_THREADS, - ITEMS_PER_THREAD, - LOAD_ALGORITHM, - LOAD_MODIFIER, - EXCHANGE_TIME_SLICING> BlockLoadKeys; - - // BlockLoad type (values) - typedef BlockLoad< - Value*, - BLOCK_THREADS, - ITEMS_PER_THREAD, - LOAD_ALGORITHM, - LOAD_MODIFIER, - EXCHANGE_TIME_SLICING> BlockLoadValues; - - // BlockExchange type (keys) - typedef BlockExchange< - UnsignedBits, - BLOCK_THREADS, - ITEMS_PER_THREAD, - EXCHANGE_TIME_SLICING> BlockExchangeKeys; - - // BlockExchange type (values) - typedef BlockExchange< - Value, - BLOCK_THREADS, - ITEMS_PER_THREAD, - EXCHANGE_TIME_SLICING> BlockExchangeValues; - - - /** - * Shared memory storage layout - */ - struct _TempStorage - { - SizeT relative_bin_offsets[RADIX_DIGITS + 1]; - bool short_circuit; - - union - { - typename BlockRadixRank::TempStorage ranking; - typename BlockLoadKeys::TempStorage load_keys; - typename BlockLoadValues::TempStorage load_values; - typename BlockExchangeKeys::TempStorage exchange_keys; - typename BlockExchangeValues::TempStorage exchange_values; - }; - }; - - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - //--------------------------------------------------------------------- - // Thread fields - //--------------------------------------------------------------------- - - // Shared storage for this CTA - _TempStorage &temp_storage; - - // Input and output device pointers - UnsignedBits *d_keys_in; - UnsignedBits *d_keys_out; - Value *d_values_in; - Value *d_values_out; - - // The global scatter base offset for each digit (valid in the first RADIX_DIGITS threads) - SizeT bin_offset; - - // The least-significant bit position of the current digit to extract - int current_bit; - - // Whether to short-ciruit - bool short_circuit; - - - - //--------------------------------------------------------------------- - // Utility methods - //--------------------------------------------------------------------- - - /** - * Decodes given keys to lookup digit offsets in shared memory - */ - __device__ __forceinline__ void DecodeRelativeBinOffsets( - UnsignedBits (&twiddled_keys)[ITEMS_PER_THREAD], - SizeT (&relative_bin_offsets)[ITEMS_PER_THREAD]) - { - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - UnsignedBits digit = BFE(twiddled_keys[KEY], current_bit, RADIX_BITS); - - // Lookup base digit offset from shared memory - relative_bin_offsets[KEY] = temp_storage.relative_bin_offsets[digit]; - } - } - - - /** - * Scatter ranked items to global memory - */ - template - __device__ __forceinline__ void ScatterItems( - T (&items)[ITEMS_PER_THREAD], - int (&local_ranks)[ITEMS_PER_THREAD], - SizeT (&relative_bin_offsets)[ITEMS_PER_THREAD], - T *d_out, - SizeT valid_items) - { - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - // Scatter if not out-of-bounds - if (FULL_TILE || (local_ranks[ITEM] < valid_items)) - { - d_out[relative_bin_offsets[ITEM] + local_ranks[ITEM]] = items[ITEM]; - } - } - } - - - /** - * Scatter ranked keys directly to global memory - */ - template - __device__ __forceinline__ void ScatterKeys( - UnsignedBits (&twiddled_keys)[ITEMS_PER_THREAD], - SizeT (&relative_bin_offsets)[ITEMS_PER_THREAD], - int (&ranks)[ITEMS_PER_THREAD], - SizeT valid_items, - Int2Type scatter_algorithm) - { - // Compute scatter offsets - DecodeRelativeBinOffsets(twiddled_keys, relative_bin_offsets); - - // Untwiddle keys before outputting - UnsignedBits keys[ITEMS_PER_THREAD]; - - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - keys[KEY] = Traits::TwiddleOut(twiddled_keys[KEY]); - } - - // Scatter to global - ScatterItems(keys, ranks, relative_bin_offsets, d_keys_out, valid_items); - } - - - /** - * Scatter ranked keys through shared memory, then to global memory - */ - template - __device__ __forceinline__ void ScatterKeys( - UnsignedBits (&twiddled_keys)[ITEMS_PER_THREAD], - SizeT (&relative_bin_offsets)[ITEMS_PER_THREAD], - int (&ranks)[ITEMS_PER_THREAD], - SizeT valid_items, - Int2Type scatter_algorithm) - { - // Exchange keys through shared memory - BlockExchangeKeys(temp_storage.exchange_keys).ScatterToStriped(twiddled_keys, ranks); - - // Compute striped local ranks - int local_ranks[ITEMS_PER_THREAD]; - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - local_ranks[ITEM] = threadIdx.x + (ITEM * BLOCK_THREADS); - } - - // Scatter directly - ScatterKeys( - twiddled_keys, - relative_bin_offsets, - local_ranks, - valid_items, - Int2Type()); - } - - - /** - * Scatter ranked values directly to global memory - */ - template - __device__ __forceinline__ void ScatterValues( - Value (&values)[ITEMS_PER_THREAD], - SizeT (&relative_bin_offsets)[ITEMS_PER_THREAD], - int (&ranks)[ITEMS_PER_THREAD], - SizeT valid_items, - Int2Type scatter_algorithm) - { - // Scatter to global - ScatterItems(values, ranks, relative_bin_offsets, d_values_out, valid_items); - } - - - /** - * Scatter ranked values through shared memory, then to global memory - */ - template - __device__ __forceinline__ void ScatterValues( - Value (&values)[ITEMS_PER_THREAD], - SizeT (&relative_bin_offsets)[ITEMS_PER_THREAD], - int (&ranks)[ITEMS_PER_THREAD], - SizeT valid_items, - Int2Type scatter_algorithm) - { - __syncthreads(); - - // Exchange keys through shared memory - BlockExchangeValues(temp_storage.exchange_values).ScatterToStriped(values, ranks); - - // Compute striped local ranks - int local_ranks[ITEMS_PER_THREAD]; - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - local_ranks[ITEM] = threadIdx.x + (ITEM * BLOCK_THREADS); - } - - // Scatter directly - ScatterValues( - values, - relative_bin_offsets, - local_ranks, - valid_items, - Int2Type()); - } - - - /** - * Load a tile of items (specialized for full tile) - */ - template - __device__ __forceinline__ void LoadItems( - BlockLoadT &block_loader, - T (&items)[ITEMS_PER_THREAD], - T *d_in, - SizeT valid_items, - Int2Type is_full_tile) - { - block_loader.Load(d_in, items); - } - - - /** - * Load a tile of items (specialized for partial tile) - */ - template - __device__ __forceinline__ void LoadItems( - BlockLoadT &block_loader, - T (&items)[ITEMS_PER_THREAD], - T *d_in, - SizeT valid_items, - Int2Type is_full_tile) - { - block_loader.Load(d_in, items, valid_items); - } - - - /** - * Truck along associated values - */ - template - __device__ __forceinline__ void GatherScatterValues( - _Value (&values)[ITEMS_PER_THREAD], - SizeT (&relative_bin_offsets)[ITEMS_PER_THREAD], - int (&ranks)[ITEMS_PER_THREAD], - SizeT block_offset, - SizeT valid_items) - { - BlockLoadValues loader(temp_storage.load_values); - LoadItems( - loader, - values, - d_values_in + block_offset, - valid_items, - Int2Type()); - - ScatterValues( - values, - relative_bin_offsets, - ranks, - valid_items, - Int2Type()); - } - - - /** - * Truck along associated values (specialized for key-only sorting) - */ - template - __device__ __forceinline__ void GatherScatterValues( - NullType (&values)[ITEMS_PER_THREAD], - SizeT (&relative_bin_offsets)[ITEMS_PER_THREAD], - int (&ranks)[ITEMS_PER_THREAD], - SizeT block_offset, - SizeT valid_items) - {} - - - /** - * Process tile - */ - template - __device__ __forceinline__ void ProcessTile( - SizeT block_offset, - const SizeT &valid_items = TILE_ITEMS) - { - // Per-thread tile data - UnsignedBits keys[ITEMS_PER_THREAD]; // Keys - UnsignedBits twiddled_keys[ITEMS_PER_THREAD]; // Twiddled keys - int ranks[ITEMS_PER_THREAD]; // For each key, the local rank within the CTA - SizeT relative_bin_offsets[ITEMS_PER_THREAD]; // For each key, the global scatter base offset of the corresponding digit - - if (LOAD_ALGORITHM != BLOCK_LOAD_DIRECT) __syncthreads(); - - // Assign max-key to all keys - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - keys[ITEM] = MAX_KEY; - } - - // Load tile of keys - BlockLoadKeys loader(temp_storage.load_keys); - LoadItems( - loader, - keys, - d_keys_in + block_offset, - valid_items, - Int2Type()); - - __syncthreads(); - - // Twiddle key bits if necessary - #pragma unroll - for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) - { - twiddled_keys[KEY] = Traits::TwiddleIn(keys[KEY]); - } - - // Rank the twiddled keys - int inclusive_digit_prefix; - BlockRadixRank(temp_storage.ranking).RankKeys( - twiddled_keys, - ranks, - current_bit, - inclusive_digit_prefix); - - // Update global scatter base offsets for each digit - if ((BLOCK_THREADS == RADIX_DIGITS) || (threadIdx.x < RADIX_DIGITS)) - { - int exclusive_digit_prefix; - - // Get exclusive digit prefix from inclusive prefix -#if CUB_PTX_ARCH >= 300 - exclusive_digit_prefix = ShuffleUp(inclusive_digit_prefix, 1); - if (threadIdx.x == 0) - exclusive_digit_prefix = 0; -#else - volatile int* exchange = reinterpret_cast(temp_storage.relative_bin_offsets); - exchange[threadIdx.x] = 0; - exchange[threadIdx.x + 1] = inclusive_digit_prefix; - exclusive_digit_prefix = exchange[threadIdx.x]; -#endif - - bin_offset -= exclusive_digit_prefix; - temp_storage.relative_bin_offsets[threadIdx.x] = bin_offset; - bin_offset += inclusive_digit_prefix; - } - - __syncthreads(); - - // Scatter keys - ScatterKeys(twiddled_keys, relative_bin_offsets, ranks, valid_items, Int2Type()); - - // Gather/scatter values - Value values[ITEMS_PER_THREAD]; - GatherScatterValues(values, relative_bin_offsets, ranks, block_offset, valid_items); - } - - - /** - * Copy tiles within the range of input - */ - template - __device__ __forceinline__ void Copy( - T *d_in, - T *d_out, - SizeT block_offset, - SizeT block_oob) - { - // Simply copy the input - while (block_offset + TILE_ITEMS <= block_oob) - { - T items[ITEMS_PER_THREAD]; - - LoadStriped(threadIdx.x, d_in + block_offset, items); - __syncthreads(); - StoreStriped(threadIdx.x, d_out + block_offset, items); - - block_offset += TILE_ITEMS; - } - - // Clean up last partial tile with guarded-I/O - if (block_offset < block_oob) - { - SizeT valid_items = block_oob - block_offset; - - T items[ITEMS_PER_THREAD]; - - LoadStriped(threadIdx.x, d_in + block_offset, items, valid_items); - __syncthreads(); - StoreStriped(threadIdx.x, d_out + block_offset, items, valid_items); - } - } - - - /** - * Copy tiles within the range of input (specialized for NullType) - */ - __device__ __forceinline__ void Copy( - NullType *d_in, - NullType *d_out, - SizeT block_offset, - SizeT block_oob) - {} - - - //--------------------------------------------------------------------- - // Interface - //--------------------------------------------------------------------- - - /** - * Constructor - */ - __device__ __forceinline__ BlockRadixSortDownsweepTiles( - TempStorage &temp_storage, - SizeT bin_offset, - Key *d_keys_in, - Key *d_keys_out, - Value *d_values_in, - Value *d_values_out, - int current_bit) - : - temp_storage(temp_storage.Alias()), - bin_offset(bin_offset), - d_keys_in(reinterpret_cast(d_keys_in)), - d_keys_out(reinterpret_cast(d_keys_out)), - d_values_in(d_values_in), - d_values_out(d_values_out), - current_bit(current_bit), - short_circuit(false) - {} - - - /** - * Constructor - */ - __device__ __forceinline__ BlockRadixSortDownsweepTiles( - TempStorage &temp_storage, - SizeT num_items, - SizeT *d_spine, - Key *d_keys_in, - Key *d_keys_out, - Value *d_values_in, - Value *d_values_out, - int current_bit) - : - temp_storage(temp_storage.Alias()), - d_keys_in(reinterpret_cast(d_keys_in)), - d_keys_out(reinterpret_cast(d_keys_out)), - d_values_in(d_values_in), - d_values_out(d_values_out), - current_bit(current_bit) - { - // Load digit bin offsets (each of the first RADIX_DIGITS threads will load an offset for that digit) - if (threadIdx.x < RADIX_DIGITS) - { - // Short circuit if the first block's histogram has only bin counts of only zeros or problem-size - SizeT first_block_bin_offset = d_spine[gridDim.x * threadIdx.x]; - int predicate = ((first_block_bin_offset == 0) || (first_block_bin_offset == num_items)); - this->temp_storage.short_circuit = WarpAll(predicate); - - // Load my block's bin offset for my bin - bin_offset = d_spine[(gridDim.x * threadIdx.x) + blockIdx.x]; - } - - __syncthreads(); - - short_circuit = this->temp_storage.short_circuit; - } - - - /** - * Distribute keys from a segment of input tiles. - */ - __device__ __forceinline__ void ProcessTiles( - SizeT block_offset, - const SizeT &block_oob) - { - if (short_circuit) - { - // Copy keys - Copy(d_keys_in, d_keys_out, block_offset, block_oob); - - // Copy values - Copy(d_values_in, d_values_out, block_offset, block_oob); - } - else - { - // Process full tiles of tile_items - while (block_offset + TILE_ITEMS <= block_oob) - { - ProcessTile(block_offset); - block_offset += TILE_ITEMS; - } - - // Clean up last partial tile with guarded-I/O - if (block_offset < block_oob) - { - ProcessTile(block_offset, block_oob - block_offset); - } - } - } -}; - - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/block/block_radix_sort_upsweep_tiles.cuh b/kokkos/kokkos/TPL/cub/device/block/block_radix_sort_upsweep_tiles.cuh deleted file mode 100644 index 22f8c9c..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/block_radix_sort_upsweep_tiles.cuh +++ /dev/null @@ -1,464 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * BlockRadixSortUpsweepTiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep. - */ - -#pragma once - -#include "../../thread/thread_reduce.cuh" -#include "../../thread/thread_load.cuh" -#include "../../block/block_load.cuh" -#include "../../util_type.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/****************************************************************************** - * Tuning policy types - ******************************************************************************/ - -/** - * Tuning policy for BlockRadixSortUpsweepTiles - */ -template < - int _BLOCK_THREADS, ///< The number of threads per CTA - int _ITEMS_PER_THREAD, ///< The number of items to load per thread per tile - PtxLoadModifier _LOAD_MODIFIER, ///< Load cache-modifier - int _RADIX_BITS> ///< The number of radix bits, i.e., log2(bins) -struct BlockRadixSortUpsweepTilesPolicy -{ - enum - { - BLOCK_THREADS = _BLOCK_THREADS, - ITEMS_PER_THREAD = _ITEMS_PER_THREAD, - RADIX_BITS = _RADIX_BITS, - TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - }; - - static const PtxLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; - - typedef BlockRadixSortUpsweepTilesPolicy< - BLOCK_THREADS, - ITEMS_PER_THREAD, - LOAD_MODIFIER, - CUB_MAX(1, RADIX_BITS - 1)> AltPolicy; -}; - - -/****************************************************************************** - * Thread block abstractions - ******************************************************************************/ - -/** - * \brief BlockRadixSortUpsweepTiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep. - * - * Computes radix digit histograms over a range of input tiles. - */ -template < - typename BlockRadixSortUpsweepTilesPolicy, - typename Key, - typename SizeT> -struct BlockRadixSortUpsweepTiles -{ - - //--------------------------------------------------------------------- - // Type definitions and constants - //--------------------------------------------------------------------- - - typedef typename Traits::UnsignedBits UnsignedBits; - - // Integer type for digit counters (to be packed into words of PackedCounters) - typedef unsigned char DigitCounter; - - // Integer type for packing DigitCounters into columns of shared memory banks - typedef unsigned int PackedCounter; - - static const PtxLoadModifier LOAD_MODIFIER = BlockRadixSortUpsweepTilesPolicy::LOAD_MODIFIER; - - enum - { - RADIX_BITS = BlockRadixSortUpsweepTilesPolicy::RADIX_BITS, - BLOCK_THREADS = BlockRadixSortUpsweepTilesPolicy::BLOCK_THREADS, - KEYS_PER_THREAD = BlockRadixSortUpsweepTilesPolicy::ITEMS_PER_THREAD, - - RADIX_DIGITS = 1 << RADIX_BITS, - - LOG_WARP_THREADS = PtxArchProps::LOG_WARP_THREADS, - WARP_THREADS = 1 << LOG_WARP_THREADS, - WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, - - TILE_ITEMS = BLOCK_THREADS * KEYS_PER_THREAD, - - BYTES_PER_COUNTER = sizeof(DigitCounter), - LOG_BYTES_PER_COUNTER = Log2::VALUE, - - PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter), - LOG_PACKING_RATIO = Log2::VALUE, - - LOG_COUNTER_LANES = CUB_MAX(0, RADIX_BITS - LOG_PACKING_RATIO), - COUNTER_LANES = 1 << LOG_COUNTER_LANES, - - // To prevent counter overflow, we must periodically unpack and aggregate the - // digit counters back into registers. Each counter lane is assigned to a - // warp for aggregation. - - LANES_PER_WARP = CUB_MAX(1, (COUNTER_LANES + WARPS - 1) / WARPS), - - // Unroll tiles in batches without risk of counter overflow - UNROLL_COUNT = CUB_MIN(64, 255 / KEYS_PER_THREAD), - UNROLLED_ELEMENTS = UNROLL_COUNT * TILE_ITEMS, - }; - - - - /** - * Shared memory storage layout - */ - struct _TempStorage - { - union - { - DigitCounter digit_counters[COUNTER_LANES][BLOCK_THREADS][PACKING_RATIO]; - PackedCounter packed_counters[COUNTER_LANES][BLOCK_THREADS]; - SizeT digit_partials[RADIX_DIGITS][WARP_THREADS + 1]; - }; - }; - - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - //--------------------------------------------------------------------- - // Thread fields (aggregate state bundle) - //--------------------------------------------------------------------- - - // Shared storage for this CTA - _TempStorage &temp_storage; - - // Thread-local counters for periodically aggregating composite-counter lanes - SizeT local_counts[LANES_PER_WARP][PACKING_RATIO]; - - // Input and output device pointers - UnsignedBits *d_keys_in; - - // The least-significant bit position of the current digit to extract - int current_bit; - - - - //--------------------------------------------------------------------- - // Helper structure for templated iteration - //--------------------------------------------------------------------- - - // Iterate - template - struct Iterate - { - enum { - HALF = (MAX / 2), - }; - - // BucketKeys - static __device__ __forceinline__ void BucketKeys( - BlockRadixSortUpsweepTiles &cta, - UnsignedBits keys[KEYS_PER_THREAD]) - { - cta.Bucket(keys[COUNT]); - - // Next - Iterate::BucketKeys(cta, keys); - } - - // ProcessTiles - static __device__ __forceinline__ void ProcessTiles(BlockRadixSortUpsweepTiles &cta, SizeT block_offset) - { - // Next - Iterate<1, HALF>::ProcessTiles(cta, block_offset); - Iterate<1, MAX - HALF>::ProcessTiles(cta, block_offset + (HALF * TILE_ITEMS)); - } - }; - - // Terminate - template - struct Iterate - { - // BucketKeys - static __device__ __forceinline__ void BucketKeys(BlockRadixSortUpsweepTiles &cta, UnsignedBits keys[KEYS_PER_THREAD]) {} - - // ProcessTiles - static __device__ __forceinline__ void ProcessTiles(BlockRadixSortUpsweepTiles &cta, SizeT block_offset) - { - cta.ProcessFullTile(block_offset); - } - }; - - - //--------------------------------------------------------------------- - // Utility methods - //--------------------------------------------------------------------- - - /** - * Decode a key and increment corresponding smem digit counter - */ - __device__ __forceinline__ void Bucket(UnsignedBits key) - { - // Perform transform op - UnsignedBits converted_key = Traits::TwiddleIn(key); - - // Add in sub-counter offset - UnsignedBits sub_counter = BFE(converted_key, current_bit, LOG_PACKING_RATIO); - - // Add in row offset - UnsignedBits row_offset = BFE(converted_key, current_bit + LOG_PACKING_RATIO, LOG_COUNTER_LANES); - - // Increment counter - temp_storage.digit_counters[row_offset][threadIdx.x][sub_counter]++; - - } - - - /** - * Reset composite counters - */ - __device__ __forceinline__ void ResetDigitCounters() - { - #pragma unroll - for (int LANE = 0; LANE < COUNTER_LANES; LANE++) - { - temp_storage.packed_counters[LANE][threadIdx.x] = 0; - } - } - - - /** - * Reset the unpacked counters in each thread - */ - __device__ __forceinline__ void ResetUnpackedCounters() - { - #pragma unroll - for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) - { - #pragma unroll - for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) - { - local_counts[LANE][UNPACKED_COUNTER] = 0; - } - } - } - - - /** - * Extracts and aggregates the digit counters for each counter lane - * owned by this warp - */ - __device__ __forceinline__ void UnpackDigitCounts() - { - unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; - unsigned int warp_tid = threadIdx.x & (WARP_THREADS - 1); - - #pragma unroll - for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) - { - const int counter_lane = (LANE * WARPS) + warp_id; - if (counter_lane < COUNTER_LANES) - { - #pragma unroll - for (int PACKED_COUNTER = 0; PACKED_COUNTER < BLOCK_THREADS; PACKED_COUNTER += WARP_THREADS) - { - #pragma unroll - for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) - { - SizeT counter = temp_storage.digit_counters[counter_lane][warp_tid + PACKED_COUNTER][UNPACKED_COUNTER]; - local_counts[LANE][UNPACKED_COUNTER] += counter; - } - } - } - } - } - - - /** - * Places unpacked counters into smem for final digit reduction - */ - __device__ __forceinline__ void ReduceUnpackedCounts(SizeT &bin_count) - { - unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; - unsigned int warp_tid = threadIdx.x & (WARP_THREADS - 1); - - // Place unpacked digit counters in shared memory - #pragma unroll - for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) - { - int counter_lane = (LANE * WARPS) + warp_id; - if (counter_lane < COUNTER_LANES) - { - int digit_row = counter_lane << LOG_PACKING_RATIO; - - #pragma unroll - for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) - { - temp_storage.digit_partials[digit_row + UNPACKED_COUNTER][warp_tid] = - local_counts[LANE][UNPACKED_COUNTER]; - } - } - } - - __syncthreads(); - - // Rake-reduce bin_count reductions - if (threadIdx.x < RADIX_DIGITS) - { - bin_count = ThreadReduce( - temp_storage.digit_partials[threadIdx.x], - Sum()); - } - } - - - /** - * Processes a single, full tile - */ - __device__ __forceinline__ void ProcessFullTile(SizeT block_offset) - { - // Tile of keys - UnsignedBits keys[KEYS_PER_THREAD]; - - LoadStriped(threadIdx.x, d_keys_in + block_offset, keys); - - // Prevent hoisting -// __threadfence_block(); -// __syncthreads(); - - // Bucket tile of keys - Iterate<0, KEYS_PER_THREAD>::BucketKeys(*this, keys); - } - - - /** - * Processes a single load (may have some threads masked off) - */ - __device__ __forceinline__ void ProcessPartialTile( - SizeT block_offset, - const SizeT &block_oob) - { - // Process partial tile if necessary using single loads - block_offset += threadIdx.x; - while (block_offset < block_oob) - { - // Load and bucket key - UnsignedBits key = ThreadLoad(d_keys_in + block_offset); - Bucket(key); - block_offset += BLOCK_THREADS; - } - } - - - //--------------------------------------------------------------------- - // Interface - //--------------------------------------------------------------------- - - /** - * Constructor - */ - __device__ __forceinline__ BlockRadixSortUpsweepTiles( - TempStorage &temp_storage, - Key *d_keys_in, - int current_bit) - : - temp_storage(temp_storage.Alias()), - d_keys_in(reinterpret_cast(d_keys_in)), - current_bit(current_bit) - {} - - - /** - * Compute radix digit histograms from a segment of input tiles. - */ - __device__ __forceinline__ void ProcessTiles( - SizeT block_offset, - const SizeT &block_oob, - SizeT &bin_count) ///< [out] The digit count for tid'th bin (output param, valid in the first RADIX_DIGITS threads) - { - // Reset digit counters in smem and unpacked counters in registers - ResetDigitCounters(); - ResetUnpackedCounters(); - - // Unroll batches of full tiles - while (block_offset + UNROLLED_ELEMENTS <= block_oob) - { - Iterate<0, UNROLL_COUNT>::ProcessTiles(*this, block_offset); - block_offset += UNROLLED_ELEMENTS; - - __syncthreads(); - - // Aggregate back into local_count registers to prevent overflow - UnpackDigitCounts(); - - __syncthreads(); - - // Reset composite counters in lanes - ResetDigitCounters(); - } - - // Unroll single full tiles - while (block_offset + TILE_ITEMS <= block_oob) - { - ProcessFullTile(block_offset); - block_offset += TILE_ITEMS; - } - - // Process partial tile if necessary - ProcessPartialTile( - block_offset, - block_oob); - - __syncthreads(); - - // Aggregate back into local_count registers - UnpackDigitCounts(); - - __syncthreads(); - - // Final raking reduction of counts by bin - ReduceUnpackedCounts(bin_count); - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/block/block_reduce_by_key_tiles.cuh b/kokkos/kokkos/TPL/cub/device/block/block_reduce_by_key_tiles.cuh deleted file mode 100644 index 99e1980..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/block_reduce_by_key_tiles.cuh +++ /dev/null @@ -1,399 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockReduceByKeyiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key. - */ - -#pragma once - -#include - -#include "scan_tiles_types.cuh" -#include "../../block/block_load.cuh" -#include "../../block/block_discontinuity.cuh" -#include "../../block/block_scan.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Utility data types - ******************************************************************************/ - -/// Scan tuple data type for reduce-value-by-key -template -struct ReduceByKeyuple -{ - Value value; // Initially set as value, contains segment aggregate after prefix scan - SizeT flag; // Initially set as a tail flag, contains scatter offset after prefix scan -}; - - -/// Binary reduce-by-key scan operator -template -struct ReduceByKeyScanOp -{ - /// Reduction functor - ReductionOp reduction_op; - - /// Constructor - ReduceByKeyScanOp(ReductionOp reduction_op) : reduction_op(reduction_op) - {} - - /// Binary scan operator - template - __device__ __forceinline__ ReduceByKeyuple operator()( - const ReduceByKeyuple &first, - const ReduceByKeyuple &second) - { - ReduceByKeyuple retval; - retval.val = (second.flag) ? second.val : reduction_op(first.val, second.val); - retval.flag = first.flag + second.flag; - return retval; - } -}; - - - -/****************************************************************************** - * Tuning policy types - ******************************************************************************/ - -/** - * Tuning policy for BlockReduceByKeyiles - */ -template < - int _BLOCK_THREADS, - int _ITEMS_PER_THREAD, - BlockLoadAlgorithm _LOAD_ALGORITHM, - bool _LOAD_WARP_TIME_SLICING, - PtxLoadModifier _LOAD_MODIFIER, - BlockScanAlgorithm _SCAN_ALGORITHM> -struct BlockReduceByKeyilesPolicy -{ - enum - { - BLOCK_THREADS = _BLOCK_THREADS, - ITEMS_PER_THREAD = _ITEMS_PER_THREAD, - LOAD_WARP_TIME_SLICING = _LOAD_WARP_TIME_SLICING, - }; - - static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; - static const PtxLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; - static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; -}; - - -/****************************************************************************** - * Thread block abstractions - ******************************************************************************/ - -/** - * \brief BlockReduceByKeyiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide prefix scan. - */ -template < - typename BlockReduceByKeyilesPolicy, ///< Tuning policy - typename KeyInputIteratorRA, ///< Random-access input iterator type for keys - typename KeyOutputIteratorRA, ///< Random-access output iterator type for keys - typename ValueInputIteratorRA, ///< Random-access input iterator type for values - typename ValueOutputIteratorRA, ///< Random-access output iterator type for values - typename ReductionOp, ///< Reduction functor type - typename SizeT> ///< Offset integer type -struct BlockReduceByKeyiles -{ - //--------------------------------------------------------------------- - // Types and constants - //--------------------------------------------------------------------- - - // Data types of input iterators - typedef typename std::iterator_traits::value_type Key; // Key data type - typedef typename std::iterator_traits::value_type Value; // Value data type - - // Constants - enum - { - BLOCK_THREADS = BlockReduceByKeyilesPolicy::BLOCK_THREADS, - ITEMS_PER_THREAD = BlockReduceByKeyilesPolicy::ITEMS_PER_THREAD, - TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - STATUS_PADDING = PtxArchProps::WARP_THREADS, - }; - - // Block load type for keys - typedef BlockLoad< - KeyInputIteratorRA, - BlockReduceByKeyilesPolicy::BLOCK_THREADS, - BlockReduceByKeyilesPolicy::ITEMS_PER_THREAD, - BlockReduceByKeyilesPolicy::LOAD_ALGORITHM, - BlockReduceByKeyilesPolicy::LOAD_MODIFIER, - BlockReduceByKeyilesPolicy::LOAD_WARP_TIME_SLICING> BlockLoadKeys; - - // Block load type for values - typedef BlockLoad< - ValueInputIteratorRA, - BlockReduceByKeyilesPolicy::BLOCK_THREADS, - BlockReduceByKeyilesPolicy::ITEMS_PER_THREAD, - BlockReduceByKeyilesPolicy::LOAD_ALGORITHM, - BlockReduceByKeyilesPolicy::LOAD_MODIFIER, - BlockReduceByKeyilesPolicy::LOAD_WARP_TIME_SLICING> BlockLoadValues; - - // Block discontinuity type for setting tail flags - typedef BlockDiscontinuity BlockDiscontinuityKeys; - - // Scan tuple type - typedef ReduceByKeyuple ScanTuple; - - // Tile status descriptor type - typedef ScanTileDescriptor ScanTileDescriptorT; - - // Block scan functor type - typedef ReduceByKeyScanOp ScanOp; - - // Block scan prefix callback type - typedef DeviceScanBlockPrefixOp PrefixCallback; - - // Block scan type - typedef BlockScan< - ScanTuple, - BlockReduceByKeyilesPolicy::BLOCK_THREADS, - BlockReduceByKeyilesPolicy::SCAN_ALGORITHM> BlockScanT; - - /// Shared memory type for this threadblock - struct _TempStorage - { - union - { - typename BlockLoadKeys::TempStorage load_keys; // Smem needed for loading tiles of keys - typename BlockLoadValues::TempStorage load_values; // Smem needed for loading tiles of values - struct - { - typename BlockScanT::TempStorage scan; // Smem needed for tile scanning - typename PrefixCallback::TempStorage prefix; // Smem needed for cooperative prefix callback - }; - }; - - typename BlockDiscontinuityKeys::TempStorage flagging; // Smem needed for tile scanning - SizeT tile_idx; // Shared tile index - }; - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - _TempStorage &temp_storage; ///< Reference to temp_storage - KeyInputIteratorRA d_keys_in; ///< Key input data - KeyOutputIteratorRA d_keys_out; ///< Key output data - ValueInputIteratorRA d_values_in; ///< Value input data - ValueOutputIteratorRA d_values_out; ///< Value output data - ScanTileDescriptorT *d_tile_status; ///< Global list of tile status - ScanOp scan_op; ///< Binary scan operator - int num_tiles; ///< Total number of input tiles for the entire problem - SizeT num_items; ///< Total number of scan items for the entire problem - - - //--------------------------------------------------------------------- - // Interface - //--------------------------------------------------------------------- - - // Constructor - __device__ __forceinline__ - BlockReduceByKeyiles( - TempStorage &temp_storage, ///< Reference to temp_storage - KeyInputIteratorRA d_keys_in, ///< Key input data - KeyOutputIteratorRA d_keys_out, ///< Key output data - ValueInputIteratorRA d_values_in, ///< Value input data - ValueOutputIteratorRA d_values_out, ///< Value output data - ScanTileDescriptorT *d_tile_status, ///< Global list of tile status - ReductionOp reduction_op, ///< Binary scan operator - int num_tiles, ///< Total number of input tiles for the entire problem - SizeT num_items) ///< Total number of scan items for the entire problem - : - temp_storage(temp_storage.Alias()), - d_keys_in(d_keys_in), - d_keys_out(d_keys_out), - d_values_in(d_values_in), - d_values_out(d_values_out), - d_tile_status(d_tile_status), - scan_op(reduction_op), - num_tiles(num_tiles), - num_items(num_items) - {} - - - /** - * Process a tile of input - */ - template - __device__ __forceinline__ void ConsumeTile( - int tile_idx, ///< Tile index - SizeT block_offset, ///< Tile offset - int valid_items = TILE_ITEMS) ///< Number of valid items in the tile - { - Key keys[ITEMS_PER_THREAD]; - Value values[ITEMS_PER_THREAD]; - int tail_flags[ITEMS_PER_THREAD]; - ScanTuple scan_tuples[ITEMS_PER_THREAD]; - - // Load keys - if (FULL_TILE) - BlockLoadKeys(temp_storage.load_keys).Load(d_keys_in + block_offset, keys); - else - BlockLoadKeys(temp_storage.load_keys).Load(d_keys_in + block_offset, keys, valid_items); - - // Set tail flags - if (tile_idx == num_tiles - 1) - { - // Last tile - BlockDiscontinuityKeys(temp_storage.flagging).FlagTails(tail_flags, keys, Equality()); - } - else - { - // Preceding tiles require the first element of the next tile - Key tile_suffix_item; - if (threadIdx.x == 0) - tile_suffix_item = d_keys_in[block_offset + TILE_ITEMS]; - - BlockDiscontinuityKeys(temp_storage.flagging).FlagTails(tail_flags, keys, Equality(), tile_suffix_item); - } - - __syncthreads(); - - // Load values - if (FULL_TILE) - BlockLoadValues(temp_storage.load_values).Load(d_values_in + block_offset, values); - else - BlockLoadValues(temp_storage.load_values).Load(d_values_in + block_offset, values, valid_items); - - // Assemble scan tuples - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - scan_tuples[ITEM].value = values[ITEM]; - scan_tuples[ITEM].flag = tail_flags[ITEM]; - } - - __syncthreads(); - - // Perform inclusive prefix scan - ScanTuple block_aggregate; - if (tile_idx == 0) - { - // Without prefix callback - BlockScanT(temp_storage.scan).InclusiveScan(scan_tuples, scan_tuples, scan_op, block_aggregate); - - // Update tile status - if (threadIdx.x == 0) - ScanTileDescriptorT::SetPrefix(d_tile_status, block_aggregate); - } - else - { - // With prefix callback - PrefixCallback prefix_op(d_tile_status, temp_storage.prefix, scan_op, tile_idx); - BlockScanT(temp_storage.scan).InclusiveScan(scan_tuples, scan_tuples, scan_op, block_aggregate, prefix_op); - } - - // Scatter flagged keys and values to output - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - int tile_item = (threadIdx.x * ITEMS_PER_THREAD) + ITEM; - - // Set the head flag on the last item in a partially-full tile - if (!FULL_TILE && (tile_item == valid_items - 1)) - tail_flags[ITEM] = 1; - - // Decrement scatter offset - scan_tuples[ITEM].flag--; - - // Scatter key and aggregate value if flagged and in range - if ((FULL_TILE || (tile_item < valid_items)) && (tail_flags[ITEM])) - { - d_keys_out[scan_tuples[ITEM].flag] = keys[ITEM]; - d_values_out[scan_tuples[ITEM].flag] = scan_tuples[ITEM].value; - } - } - } - - - - /** - * Dequeue and scan tiles of elements - */ - __device__ __forceinline__ void ProcessTiles(GridQueue queue) ///< Queue descriptor for assigning tiles of work to thread blocks - { - // We give each thread block at least one tile of input - int tile_idx = blockIdx.x; - - // Consume full tiles of input - SizeT block_offset = SizeT(TILE_ITEMS) * tile_idx; - while (block_offset + TILE_ITEMS <= num_items) - { - ConsumeTile(tile_idx, block_offset); - - // Get next tile -#if CUB_PTX_ARCH < 200 - // No concurrent kernels allowed, so just stripe tiles - tile_idx += gridDim.x; -#else - // Concurrent kernels are allowed, so we must only use active blocks to dequeue tile indices - if (threadIdx.x == 0) - temp_storage.tile_idx = queue.Drain(1) + gridDim.x; - - __syncthreads(); - - tile_idx = temp_storage.tile_idx; -#endif - block_offset = SizeT(TILE_ITEMS) * tile_idx; - } - - // Consume a partially-full tile - if (block_offset < num_items) - { - // Consume a partially-full tile - int valid_items = num_items - block_offset; - ConsumeTile(tile_idx, block_offset, valid_items); - } - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/block/block_reduce_tiles.cuh b/kokkos/kokkos/TPL/cub/device/block/block_reduce_tiles.cuh deleted file mode 100644 index a83c098..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/block_reduce_tiles.cuh +++ /dev/null @@ -1,375 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockReduceTiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction. - */ - -#pragma once - -#include - -#include "../../block/block_load.cuh" -#include "../../block/block_reduce.cuh" -#include "../../grid/grid_mapping.cuh" -#include "../../grid/grid_queue.cuh" -#include "../../grid/grid_even_share.cuh" -#include "../../util_vector.cuh" -#include "../../util_namespace.cuh" - - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Tuning policy types - ******************************************************************************/ - -/** - * Tuning policy for BlockReduceTiles - */ -template < - int _BLOCK_THREADS, ///< Threads per thread block - int _ITEMS_PER_THREAD, ///< Items per thread per tile of input - int _VECTOR_LOAD_LENGTH, ///< Number of items per vectorized load - BlockReduceAlgorithm _BLOCK_ALGORITHM, ///< Cooperative block-wide reduction algorithm to use - PtxLoadModifier _LOAD_MODIFIER, ///< PTX load modifier - GridMappingStrategy _GRID_MAPPING> ///< How to map tiles of input onto thread blocks -struct BlockReduceTilesPolicy -{ - enum - { - BLOCK_THREADS = _BLOCK_THREADS, - ITEMS_PER_THREAD = _ITEMS_PER_THREAD, - VECTOR_LOAD_LENGTH = _VECTOR_LOAD_LENGTH, - }; - - static const BlockReduceAlgorithm BLOCK_ALGORITHM = _BLOCK_ALGORITHM; - static const GridMappingStrategy GRID_MAPPING = _GRID_MAPPING; - static const PtxLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; -}; - - - -/****************************************************************************** - * Thread block abstractions - ******************************************************************************/ - -/** - * \brief BlockReduceTiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction. - * - * Each thread reduces only the values it loads. If \p FIRST_TILE, this - * partial reduction is stored into \p thread_aggregate. Otherwise it is - * accumulated into \p thread_aggregate. - */ -template < - typename BlockReduceTilesPolicy, - typename InputIteratorRA, - typename SizeT, - typename ReductionOp> -struct BlockReduceTiles -{ - - //--------------------------------------------------------------------- - // Types and constants - //--------------------------------------------------------------------- - - typedef typename std::iterator_traits::value_type T; // Type of input iterator - typedef VectorHelper VecHelper; // Helper type for vectorizing loads of T - typedef typename VecHelper::Type VectorT; // Vector of T - - // Constants - enum - { - BLOCK_THREADS = BlockReduceTilesPolicy::BLOCK_THREADS, - ITEMS_PER_THREAD = BlockReduceTilesPolicy::ITEMS_PER_THREAD, - TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - VECTOR_LOAD_LENGTH = BlockReduceTilesPolicy::VECTOR_LOAD_LENGTH, - - // Can vectorize according to the policy if the input iterator is a native pointer to a built-in primitive - CAN_VECTORIZE = (BlockReduceTilesPolicy::VECTOR_LOAD_LENGTH > 1) && - (IsPointer::VALUE) && - (VecHelper::BUILT_IN), - - }; - - static const PtxLoadModifier LOAD_MODIFIER = BlockReduceTilesPolicy::LOAD_MODIFIER; - static const BlockReduceAlgorithm BLOCK_ALGORITHM = BlockReduceTilesPolicy::BLOCK_ALGORITHM; - - // Parameterized BlockReduce primitive - typedef BlockReduce BlockReduceT; - - /// Shared memory type required by this thread block - typedef typename BlockReduceT::TempStorage _TempStorage; - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - T thread_aggregate; ///< Each thread's partial reduction - _TempStorage& temp_storage; ///< Reference to temp_storage - InputIteratorRA d_in; ///< Input data to reduce - ReductionOp reduction_op; ///< Binary reduction operator - int first_tile_size; ///< Size of first tile consumed - bool input_aligned; ///< Whether or not input is vector-aligned - - - //--------------------------------------------------------------------- - // Interface - //--------------------------------------------------------------------- - - /** - * Constructor - */ - __device__ __forceinline__ BlockReduceTiles( - TempStorage& temp_storage, ///< Reference to temp_storage - InputIteratorRA d_in, ///< Input data to reduce - ReductionOp reduction_op) ///< Binary reduction operator - : - temp_storage(temp_storage.Alias()), - d_in(d_in), - reduction_op(reduction_op), - first_tile_size(0), - input_aligned(CAN_VECTORIZE && ((size_t(d_in) & (sizeof(VectorT) - 1)) == 0)) - {} - - - /** - * Process a single tile of input - */ - template - __device__ __forceinline__ void ConsumeTile( - SizeT block_offset, ///< The offset the tile to consume - int valid_items = TILE_ITEMS) ///< The number of valid items in the tile - { - if (FULL_TILE) - { - T stripe_partial; - - // Load full tile - if (input_aligned) - { - // Alias items as an array of VectorT and load it in striped fashion - enum { WORDS = ITEMS_PER_THREAD / VECTOR_LOAD_LENGTH }; - - VectorT vec_items[WORDS]; - - // Load striped into vec items - VectorT* alias_ptr = reinterpret_cast(d_in + block_offset + (threadIdx.x * VECTOR_LOAD_LENGTH)); - - #pragma unroll - for (int i = 0; i < WORDS; ++i) - vec_items[i] = alias_ptr[BLOCK_THREADS * i]; - - // Reduce items within each thread stripe - stripe_partial = ThreadReduce( - reinterpret_cast(vec_items), - reduction_op); - } - else - { - T items[ITEMS_PER_THREAD]; - - // Load items in striped fashion - LoadStriped(threadIdx.x, d_in + block_offset, items); - - // Reduce items within each thread stripe - stripe_partial = ThreadReduce(items, reduction_op); - } - - // Update running thread aggregate - thread_aggregate = (first_tile_size) ? - reduction_op(thread_aggregate, stripe_partial) : // Update - stripe_partial; // Assign - } - else - { - - // Partial tile - int thread_offset = threadIdx.x; - - if (!first_tile_size && (thread_offset < valid_items)) - { - // Assign thread_aggregate - thread_aggregate = ThreadLoad(d_in + block_offset + thread_offset); - thread_offset += BLOCK_THREADS; - } - - while (thread_offset < valid_items) - { - // Update thread aggregate - T item = ThreadLoad(d_in + block_offset + thread_offset); - thread_aggregate = reduction_op(thread_aggregate, item); - thread_offset += BLOCK_THREADS; - } - } - - // Set first tile size if necessary - if (!first_tile_size) - first_tile_size = valid_items; - } - - - //--------------------------------------------------------------------- - // Consume a contiguous segment of tiles - //--------------------------------------------------------------------- - - /** - * \brief Reduce a contiguous segment of input tiles - */ - __device__ __forceinline__ void ConsumeTiles( - SizeT block_offset, ///< [in] Threadblock begin offset (inclusive) - SizeT block_oob, ///< [in] Threadblock end offset (exclusive) - T &block_aggregate) ///< [out] Running total - { - // Consume subsequent full tiles of input - while (block_offset + TILE_ITEMS <= block_oob) - { - ConsumeTile(block_offset); - block_offset += TILE_ITEMS; - } - - // Consume a partially-full tile - if (block_offset < block_oob) - { - int valid_items = block_oob - block_offset; - ConsumeTile(block_offset, valid_items); - } - - // Compute block-wide reduction - block_aggregate = (first_tile_size < TILE_ITEMS) ? - BlockReduceT(temp_storage).Reduce(thread_aggregate, reduction_op, first_tile_size) : - BlockReduceT(temp_storage).Reduce(thread_aggregate, reduction_op); - } - - - /** - * Reduce a contiguous segment of input tiles - */ - __device__ __forceinline__ void ConsumeTiles( - SizeT num_items, ///< [in] Total number of global input items - GridEvenShare &even_share, ///< [in] GridEvenShare descriptor - GridQueue &queue, ///< [in,out] GridQueue descriptor - T &block_aggregate, ///< [out] Running total - Int2Type is_even_share) ///< [in] Marker type indicating this is an even-share mapping - { - // Initialize even-share descriptor for this thread block - even_share.BlockInit(); - - // Consume input tiles - ConsumeTiles(even_share.block_offset, even_share.block_oob, block_aggregate); - } - - - //--------------------------------------------------------------------- - // Dynamically consume tiles - //--------------------------------------------------------------------- - - /** - * Dequeue and reduce tiles of items as part of a inter-block scan - */ - __device__ __forceinline__ void ConsumeTiles( - int num_items, ///< Total number of input items - GridQueue queue, ///< Queue descriptor for assigning tiles of work to thread blocks - T &block_aggregate) ///< [out] Running total - { - // Shared dequeue offset - __shared__ SizeT dequeue_offset; - - // We give each thread block at least one tile of input. - SizeT block_offset = blockIdx.x * TILE_ITEMS; - SizeT even_share_base = gridDim.x * TILE_ITEMS; - - if (block_offset + TILE_ITEMS <= num_items) - { - // Consume full tile of input - ConsumeTile(block_offset); - - // Dequeue more tiles - while (true) - { - // Dequeue a tile of items - if (threadIdx.x == 0) - dequeue_offset = queue.Drain(TILE_ITEMS) + even_share_base; - - __syncthreads(); - - // Grab tile offset and check if we're done with full tiles - block_offset = dequeue_offset; - - __syncthreads(); - - if (block_offset + TILE_ITEMS > num_items) - break; - - // Consume a full tile - ConsumeTile(block_offset); - } - } - - if (block_offset < num_items) - { - int valid_items = num_items - block_offset; - ConsumeTile(block_offset, valid_items); - } - - // Compute block-wide reduction - block_aggregate = (first_tile_size < TILE_ITEMS) ? - BlockReduceT(temp_storage).Reduce(thread_aggregate, reduction_op, first_tile_size) : - BlockReduceT(temp_storage).Reduce(thread_aggregate, reduction_op); - } - - - /** - * Dequeue and reduce tiles of items as part of a inter-block scan - */ - __device__ __forceinline__ void ConsumeTiles( - SizeT num_items, ///< [in] Total number of global input items - GridEvenShare &even_share, ///< [in] GridEvenShare descriptor - GridQueue &queue, ///< [in,out] GridQueue descriptor - T &block_aggregate, ///< [out] Running total - Int2Type is_dynamic) ///< [in] Marker type indicating this is a dynamic mapping - { - ConsumeTiles(num_items, queue, block_aggregate); - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/block/block_scan_tiles.cuh b/kokkos/kokkos/TPL/cub/device/block/block_scan_tiles.cuh deleted file mode 100644 index 9802204..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/block_scan_tiles.cuh +++ /dev/null @@ -1,509 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockScanTiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide prefix scan. - */ - -#pragma once - -#include - -#include "scan_tiles_types.cuh" -#include "../../block/block_load.cuh" -#include "../../block/block_store.cuh" -#include "../../block/block_scan.cuh" -#include "../../grid/grid_queue.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Tuning policy types - ******************************************************************************/ - -/** - * Tuning policy for BlockScanTiles - */ -template < - int _BLOCK_THREADS, - int _ITEMS_PER_THREAD, - BlockLoadAlgorithm _LOAD_ALGORITHM, - bool _LOAD_WARP_TIME_SLICING, - PtxLoadModifier _LOAD_MODIFIER, - BlockStoreAlgorithm _STORE_ALGORITHM, - bool _STORE_WARP_TIME_SLICING, - BlockScanAlgorithm _SCAN_ALGORITHM> -struct BlockScanTilesPolicy -{ - enum - { - BLOCK_THREADS = _BLOCK_THREADS, - ITEMS_PER_THREAD = _ITEMS_PER_THREAD, - LOAD_WARP_TIME_SLICING = _LOAD_WARP_TIME_SLICING, - STORE_WARP_TIME_SLICING = _STORE_WARP_TIME_SLICING, - }; - - static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; - static const PtxLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; - static const BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM; - static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; -}; - - -/****************************************************************************** - * Thread block abstractions - ******************************************************************************/ - -/** - * \brief BlockScanTiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide prefix scan. - * - * Implements a single-pass "domino" strategy with adaptive prefix lookback. - */ -template < - typename BlockScanTilesPolicy, ///< Tuning policy - typename InputIteratorRA, ///< Input iterator type - typename OutputIteratorRA, ///< Output iterator type - typename ScanOp, ///< Scan functor type - typename Identity, ///< Identity element type (cub::NullType for inclusive scan) - typename SizeT> ///< Offset integer type -struct BlockScanTiles -{ - //--------------------------------------------------------------------- - // Types and constants - //--------------------------------------------------------------------- - - // Data type of input iterator - typedef typename std::iterator_traits::value_type T; - - // Constants - enum - { - INCLUSIVE = Equals::VALUE, // Inclusive scan if no identity type is provided - BLOCK_THREADS = BlockScanTilesPolicy::BLOCK_THREADS, - ITEMS_PER_THREAD = BlockScanTilesPolicy::ITEMS_PER_THREAD, - TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - }; - - // Block load type - typedef BlockLoad< - InputIteratorRA, - BlockScanTilesPolicy::BLOCK_THREADS, - BlockScanTilesPolicy::ITEMS_PER_THREAD, - BlockScanTilesPolicy::LOAD_ALGORITHM, - BlockScanTilesPolicy::LOAD_MODIFIER, - BlockScanTilesPolicy::LOAD_WARP_TIME_SLICING> BlockLoadT; - - // Block store type - typedef BlockStore< - OutputIteratorRA, - BlockScanTilesPolicy::BLOCK_THREADS, - BlockScanTilesPolicy::ITEMS_PER_THREAD, - BlockScanTilesPolicy::STORE_ALGORITHM, - STORE_DEFAULT, - BlockScanTilesPolicy::STORE_WARP_TIME_SLICING> BlockStoreT; - - // Tile status descriptor type - typedef ScanTileDescriptor ScanTileDescriptorT; - - // Block scan type - typedef BlockScan< - T, - BlockScanTilesPolicy::BLOCK_THREADS, - BlockScanTilesPolicy::SCAN_ALGORITHM> BlockScanT; - - // Callback type for obtaining inter-tile prefix during block scan - typedef DeviceScanBlockPrefixOp InterblockPrefixOp; - - // Shared memory type for this threadblock - struct _TempStorage - { - union - { - typename BlockLoadT::TempStorage load; // Smem needed for tile loading - typename BlockStoreT::TempStorage store; // Smem needed for tile storing - struct - { - typename InterblockPrefixOp::TempStorage prefix; // Smem needed for cooperative prefix callback - typename BlockScanT::TempStorage scan; // Smem needed for tile scanning - }; - }; - - SizeT tile_idx; // Shared tile index - }; - - // Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - _TempStorage &temp_storage; ///< Reference to temp_storage - InputIteratorRA d_in; ///< Input data - OutputIteratorRA d_out; ///< Output data - ScanOp scan_op; ///< Binary scan operator - Identity identity; ///< Identity element - - - - //--------------------------------------------------------------------- - // Block scan utility methods (first tile) - //--------------------------------------------------------------------- - - /** - * Exclusive scan specialization - */ - template - __device__ __forceinline__ - void ScanBlock(T (&items)[ITEMS_PER_THREAD], _ScanOp scan_op, _Identity identity, T& block_aggregate) - { - BlockScanT(temp_storage.scan).ExclusiveScan(items, items, identity, scan_op, block_aggregate); - } - - /** - * Exclusive sum specialization - */ - template - __device__ __forceinline__ - void ScanBlock(T (&items)[ITEMS_PER_THREAD], Sum scan_op, _Identity identity, T& block_aggregate) - { - BlockScanT(temp_storage.scan).ExclusiveSum(items, items, block_aggregate); - } - - /** - * Inclusive scan specialization - */ - template - __device__ __forceinline__ - void ScanBlock(T (&items)[ITEMS_PER_THREAD], _ScanOp scan_op, NullType identity, T& block_aggregate) - { - BlockScanT(temp_storage.scan).InclusiveScan(items, items, scan_op, block_aggregate); - } - - /** - * Inclusive sum specialization - */ - __device__ __forceinline__ - void ScanBlock(T (&items)[ITEMS_PER_THREAD], Sum scan_op, NullType identity, T& block_aggregate) - { - BlockScanT(temp_storage.scan).InclusiveSum(items, items, block_aggregate); - } - - //--------------------------------------------------------------------- - // Block scan utility methods (subsequent tiles) - //--------------------------------------------------------------------- - - /** - * Exclusive scan specialization (with prefix from predecessors) - */ - template - __device__ __forceinline__ - void ScanBlock(T (&items)[ITEMS_PER_THREAD], _ScanOp scan_op, _Identity identity, T& block_aggregate, PrefixCallback &prefix_op) - { - BlockScanT(temp_storage.scan).ExclusiveScan(items, items, identity, scan_op, block_aggregate, prefix_op); - } - - /** - * Exclusive sum specialization (with prefix from predecessors) - */ - template - __device__ __forceinline__ - void ScanBlock(T (&items)[ITEMS_PER_THREAD], Sum scan_op, _Identity identity, T& block_aggregate, PrefixCallback &prefix_op) - { - BlockScanT(temp_storage.scan).ExclusiveSum(items, items, block_aggregate, prefix_op); - } - - /** - * Inclusive scan specialization (with prefix from predecessors) - */ - template - __device__ __forceinline__ - void ScanBlock(T (&items)[ITEMS_PER_THREAD], _ScanOp scan_op, NullType identity, T& block_aggregate, PrefixCallback &prefix_op) - { - BlockScanT(temp_storage.scan).InclusiveScan(items, items, scan_op, block_aggregate, prefix_op); - } - - /** - * Inclusive sum specialization (with prefix from predecessors) - */ - template - __device__ __forceinline__ - void ScanBlock(T (&items)[ITEMS_PER_THREAD], Sum scan_op, NullType identity, T& block_aggregate, PrefixCallback &prefix_op) - { - BlockScanT(temp_storage.scan).InclusiveSum(items, items, block_aggregate, prefix_op); - } - - //--------------------------------------------------------------------- - // Constructor - //--------------------------------------------------------------------- - - // Constructor - __device__ __forceinline__ - BlockScanTiles( - TempStorage &temp_storage, ///< Reference to temp_storage - InputIteratorRA d_in, ///< Input data - OutputIteratorRA d_out, ///< Output data - ScanOp scan_op, ///< Binary scan operator - Identity identity) ///< Identity element - : - temp_storage(temp_storage.Alias()), - d_in(d_in), - d_out(d_out), - scan_op(scan_op), - identity(identity) - {} - - - //--------------------------------------------------------------------- - // Domino scan - //--------------------------------------------------------------------- - - /** - * Process a tile of input (domino scan) - */ - template - __device__ __forceinline__ void ConsumeTile( - SizeT num_items, ///< Total number of input items - int tile_idx, ///< Tile index - SizeT block_offset, ///< Tile offset - ScanTileDescriptorT *d_tile_status) ///< Global list of tile status - { - // Load items - T items[ITEMS_PER_THREAD]; - - if (FULL_TILE) - BlockLoadT(temp_storage.load).Load(d_in + block_offset, items); - else - BlockLoadT(temp_storage.load).Load(d_in + block_offset, items, num_items - block_offset); - - __syncthreads(); - - T block_aggregate; - if (tile_idx == 0) - { - ScanBlock(items, scan_op, identity, block_aggregate); - - // Update tile status if there are successor tiles - if (FULL_TILE && (threadIdx.x == 0)) - ScanTileDescriptorT::SetPrefix(d_tile_status, block_aggregate); - } - else - { - InterblockPrefixOp prefix_op(d_tile_status, temp_storage.prefix, scan_op, tile_idx); - ScanBlock(items, scan_op, identity, block_aggregate, prefix_op); - } - - __syncthreads(); - - // Store items - if (FULL_TILE) - BlockStoreT(temp_storage.store).Store(d_out + block_offset, items); - else - BlockStoreT(temp_storage.store).Store(d_out + block_offset, items, num_items - block_offset); - } - - /** - * Dequeue and scan tiles of items as part of a domino scan - */ - __device__ __forceinline__ void ConsumeTiles( - int num_items, ///< Total number of input items - GridQueue queue, ///< Queue descriptor for assigning tiles of work to thread blocks - ScanTileDescriptorT *d_tile_status) ///< Global list of tile status - { -#if CUB_PTX_ARCH < 200 - - // No concurrent kernels allowed and blocks are launched in increasing order, so just assign one tile per block (up to 65K blocks) - int tile_idx = blockIdx.x; - SizeT block_offset = SizeT(TILE_ITEMS) * tile_idx; - - if (block_offset + TILE_ITEMS <= num_items) - ConsumeTile(num_items, tile_idx, block_offset, d_tile_status); - else if (block_offset < num_items) - ConsumeTile(num_items, tile_idx, block_offset, d_tile_status); - -#else - - // Get first tile - if (threadIdx.x == 0) - temp_storage.tile_idx = queue.Drain(1); - - __syncthreads(); - - int tile_idx = temp_storage.tile_idx; - SizeT block_offset = SizeT(TILE_ITEMS) * tile_idx; - - while (block_offset + TILE_ITEMS <= num_items) - { - // Consume full tile - ConsumeTile(num_items, tile_idx, block_offset, d_tile_status); - - // Get next tile - if (threadIdx.x == 0) - temp_storage.tile_idx = queue.Drain(1); - - __syncthreads(); - - tile_idx = temp_storage.tile_idx; - block_offset = SizeT(TILE_ITEMS) * tile_idx; - } - - // Consume a partially-full tile - if (block_offset < num_items) - { - ConsumeTile(num_items, tile_idx, block_offset, d_tile_status); - } -#endif - - } - - - //--------------------------------------------------------------------- - // Even-share scan - //--------------------------------------------------------------------- - - /** - * Process a tile of input - */ - template < - bool FULL_TILE, - bool FIRST_TILE> - __device__ __forceinline__ void ConsumeTile( - SizeT block_offset, ///< Tile offset - RunningBlockPrefixOp &prefix_op, ///< Running prefix operator - int valid_items = TILE_ITEMS) ///< Number of valid items in the tile - { - // Load items - T items[ITEMS_PER_THREAD]; - - if (FULL_TILE) - BlockLoadT(temp_storage.load).Load(d_in + block_offset, items); - else - BlockLoadT(temp_storage.load).Load(d_in + block_offset, items, valid_items); - - __syncthreads(); - - // Block scan - T block_aggregate; - if (FIRST_TILE) - { - ScanBlock(items, scan_op, identity, block_aggregate); - prefix_op.running_total = block_aggregate; - } - else - { - ScanBlock(items, scan_op, identity, block_aggregate, prefix_op); - } - - __syncthreads(); - - // Store items - if (FULL_TILE) - BlockStoreT(temp_storage.store).Store(d_out + block_offset, items); - else - BlockStoreT(temp_storage.store).Store(d_out + block_offset, items, valid_items); - } - - - /** - * Scan a consecutive share of input tiles - */ - __device__ __forceinline__ void ConsumeTiles( - SizeT block_offset, ///< [in] Threadblock begin offset (inclusive) - SizeT block_oob) ///< [in] Threadblock end offset (exclusive) - { - RunningBlockPrefixOp prefix_op; - - if (block_offset + TILE_ITEMS <= block_oob) - { - // Consume first tile of input (full) - ConsumeTile(block_offset, prefix_op); - block_offset += TILE_ITEMS; - - // Consume subsequent full tiles of input - while (block_offset + TILE_ITEMS <= block_oob) - { - ConsumeTile(block_offset, prefix_op); - block_offset += TILE_ITEMS; - } - - // Consume a partially-full tile - if (block_offset < block_oob) - { - int valid_items = block_oob - block_offset; - ConsumeTile(block_offset, prefix_op, valid_items); - } - } - else - { - // Consume the first tile of input (partially-full) - int valid_items = block_oob - block_offset; - ConsumeTile(block_offset, prefix_op, valid_items); - } - } - - - /** - * Scan a consecutive share of input tiles, seeded with the specified prefix value - */ - __device__ __forceinline__ void ConsumeTiles( - SizeT block_offset, ///< [in] Threadblock begin offset (inclusive) - SizeT block_oob, ///< [in] Threadblock end offset (exclusive) - T prefix) ///< [in] The prefix to apply to the scan segment - { - RunningBlockPrefixOp prefix_op; - prefix_op.running_total = prefix; - - // Consume full tiles of input - while (block_offset + TILE_ITEMS <= block_oob) - { - ConsumeTile(block_offset, prefix_op); - block_offset += TILE_ITEMS; - } - - // Consume a partially-full tile - if (block_offset < block_oob) - { - int valid_items = block_oob - block_offset; - ConsumeTile(block_offset, prefix_op, valid_items); - } - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/block/scan_tiles_types.cuh b/kokkos/kokkos/TPL/cub/device/block/scan_tiles_types.cuh deleted file mode 100644 index 2b933d0..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/scan_tiles_types.cuh +++ /dev/null @@ -1,318 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Utility types for device-wide scan - */ - -#pragma once - -#include - -#include "../../thread/thread_load.cuh" -#include "../../thread/thread_store.cuh" -#include "../../warp/warp_reduce.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * Enumerations of tile status - */ -enum ScanTileStatus -{ - SCAN_TILE_OOB, // Out-of-bounds (e.g., padding) - SCAN_TILE_INVALID, // Not yet processed - SCAN_TILE_PARTIAL, // Tile aggregate is available - SCAN_TILE_PREFIX, // Inclusive tile prefix is available -}; - - -/** - * Data type of tile status descriptor. - * - * Specialized for scan status and value types that can be combined into the same - * machine word that can be read/written coherently in a single access. - */ -template < - typename T, - bool SINGLE_WORD = (PowerOfTwo::VALUE && (sizeof(T) <= 8))> -struct ScanTileDescriptor -{ - // Status word type - typedef typename If<(sizeof(T) == 8), - long long, - typename If<(sizeof(T) == 4), - int, - typename If<(sizeof(T) == 2), - short, - char>::Type>::Type>::Type StatusWord; - - // Vector word type - typedef typename If<(sizeof(T) == 8), - longlong2, - typename If<(sizeof(T) == 4), - int2, - typename If<(sizeof(T) == 2), - int, - short>::Type>::Type>::Type VectorWord; - - T value; - StatusWord status; - - static __device__ __forceinline__ void SetPrefix(ScanTileDescriptor *ptr, T prefix) - { - ScanTileDescriptor tile_descriptor; - tile_descriptor.status = SCAN_TILE_PREFIX; - tile_descriptor.value = prefix; - - VectorWord alias; - *reinterpret_cast(&alias) = tile_descriptor; - ThreadStore(reinterpret_cast(ptr), alias); - } - - static __device__ __forceinline__ void SetPartial(ScanTileDescriptor *ptr, T partial) - { - ScanTileDescriptor tile_descriptor; - tile_descriptor.status = SCAN_TILE_PARTIAL; - tile_descriptor.value = partial; - - VectorWord alias; - *reinterpret_cast(&alias) = tile_descriptor; - ThreadStore(reinterpret_cast(ptr), alias); - } - - static __device__ __forceinline__ void WaitForValid( - ScanTileDescriptor *ptr, - int &status, - T &value) - { - ScanTileDescriptor tile_descriptor; - while (true) - { - VectorWord alias = ThreadLoad(reinterpret_cast(ptr)); - - tile_descriptor = *reinterpret_cast(&alias); - if (tile_descriptor.status != SCAN_TILE_INVALID) break; - - __threadfence_block(); - } - - status = tile_descriptor.status; - value = tile_descriptor.value; - } - -}; - - -/** - * Data type of tile status descriptor. - * - * Specialized for scan status and value types that cannot fused into - * the same machine word. - */ -template -struct ScanTileDescriptor -{ - T prefix_value; - T partial_value; - - /// Workaround for the fact that win32 doesn't guarantee 16B alignment 16B values of T - union - { - int status; - Uninitialized padding; - }; - - static __device__ __forceinline__ void SetPrefix(ScanTileDescriptor *ptr, T prefix) - { - ThreadStore(&ptr->prefix_value, prefix); - __threadfence_block(); -// __threadfence(); // __threadfence_block seems sufficient on current architectures to prevent reordeing - ThreadStore(&ptr->status, (int) SCAN_TILE_PREFIX); - - } - - static __device__ __forceinline__ void SetPartial(ScanTileDescriptor *ptr, T partial) - { - ThreadStore(&ptr->partial_value, partial); - __threadfence_block(); -// __threadfence(); // __threadfence_block seems sufficient on current architectures to prevent reordeing - ThreadStore(&ptr->status, (int) SCAN_TILE_PARTIAL); - } - - static __device__ __forceinline__ void WaitForValid( - ScanTileDescriptor *ptr, - int &status, - T &value) - { - while (true) - { - status = ThreadLoad(&ptr->status); - if (status != SCAN_TILE_INVALID) break; - - __threadfence_block(); - } - - value = (status == SCAN_TILE_PARTIAL) ? - ThreadLoad(&ptr->partial_value) : - ThreadLoad(&ptr->prefix_value); - } -}; - - -/** - * Stateful prefix functor that provides the the running prefix for - * the current tile by using the callback warp to wait on on - * aggregates/prefixes from predecessor tiles to become available - */ -template < - typename T, - typename ScanOp> -struct DeviceScanBlockPrefixOp -{ - // Parameterized warp reduce - typedef WarpReduce WarpReduceT; - - // Storage type - typedef typename WarpReduceT::TempStorage _TempStorage; - - // Alias wrapper allowing storage to be unioned - typedef Uninitialized<_TempStorage> TempStorage; - - // Tile status descriptor type - typedef ScanTileDescriptor ScanTileDescriptorT; - - // Fields - ScanTileDescriptorT *d_tile_status; ///< Pointer to array of tile status - _TempStorage &temp_storage; ///< Reference to a warp-reduction instance - ScanOp scan_op; ///< Binary scan operator - int tile_idx; ///< The current tile index - T inclusive_prefix; ///< Inclusive prefix for the tile - - // Constructor - __device__ __forceinline__ - DeviceScanBlockPrefixOp( - ScanTileDescriptorT *d_tile_status, - TempStorage &temp_storage, - ScanOp scan_op, - int tile_idx) : - d_tile_status(d_tile_status), - temp_storage(temp_storage.Alias()), - scan_op(scan_op), - tile_idx(tile_idx) {} - - - // Block until all predecessors within the specified window have non-invalid status - __device__ __forceinline__ - void ProcessWindow( - int predecessor_idx, - int &predecessor_status, - T &window_aggregate) - { - T value; - ScanTileDescriptorT::WaitForValid(d_tile_status + predecessor_idx, predecessor_status, value); - - // Perform a segmented reduction to get the prefix for the current window - int flag = (predecessor_status != SCAN_TILE_PARTIAL); - window_aggregate = WarpReduceT(temp_storage).TailSegmentedReduce(value, flag, scan_op); - } - - - // Prefix functor (called by the first warp) - __device__ __forceinline__ - T operator()(T block_aggregate) - { - // Update our status with our tile-aggregate - if (threadIdx.x == 0) - { - ScanTileDescriptorT::SetPartial(d_tile_status + tile_idx, block_aggregate); - } - - // Wait for the window of predecessor tiles to become valid - int predecessor_idx = tile_idx - threadIdx.x - 1; - int predecessor_status; - T window_aggregate; - ProcessWindow(predecessor_idx, predecessor_status, window_aggregate); - - // The exclusive tile prefix starts out as the current window aggregate - T exclusive_prefix = window_aggregate; - - // Keep sliding the window back until we come across a tile whose inclusive prefix is known - while (WarpAll(predecessor_status != SCAN_TILE_PREFIX)) - { - predecessor_idx -= PtxArchProps::WARP_THREADS; - - // Update exclusive tile prefix with the window prefix - ProcessWindow(predecessor_idx, predecessor_status, window_aggregate); - exclusive_prefix = scan_op(window_aggregate, exclusive_prefix); - } - - // Compute the inclusive tile prefix and update the status for this tile - if (threadIdx.x == 0) - { - inclusive_prefix = scan_op(exclusive_prefix, block_aggregate); - ScanTileDescriptorT::SetPrefix( - d_tile_status + tile_idx, - inclusive_prefix); - } - - // Return exclusive_prefix - return exclusive_prefix; - } -}; - - -// Running scan prefix callback type for single-block scans. -// Maintains a running prefix that can be applied to consecutive -// scan operations. -template -struct RunningBlockPrefixOp -{ - // Running prefix - T running_total; - - // Callback operator. - __device__ T operator()(T block_aggregate) - { - T old_prefix = running_total; - running_total += block_aggregate; - return old_prefix; - } -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/block/specializations/block_histo_tiles_gatomic.cuh b/kokkos/kokkos/TPL/cub/device/block/specializations/block_histo_tiles_gatomic.cuh deleted file mode 100644 index 5896dbc..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/specializations/block_histo_tiles_gatomic.cuh +++ /dev/null @@ -1,184 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockHistogramTilesGlobalAtomic implements a stateful abstraction of CUDA thread blocks for histogramming multiple tiles as part of device-wide histogram. - */ - -#pragma once - -#include - -#include "../../../util_type.cuh" -#include "../../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - - -/** - * BlockHistogramTilesGlobalAtomic implements a stateful abstraction of CUDA thread blocks for histogramming multiple tiles as part of device-wide histogram using global atomics - */ -template < - typename BlockHistogramTilesPolicy, ///< Tuning policy - int BINS, ///< Number of histogram bins per channel - int CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of active channels being histogrammed) - int ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed - typename InputIteratorRA, ///< The input iterator type (may be a simple pointer type). Must have a value type that can be cast as an integer in the range [0..BINS-1] - typename HistoCounter, ///< Integral type for counting sample occurrences per histogram bin - typename SizeT> ///< Integer type for offsets -struct BlockHistogramTilesGlobalAtomic -{ - //--------------------------------------------------------------------- - // Types and constants - //--------------------------------------------------------------------- - - // Sample type - typedef typename std::iterator_traits::value_type SampleT; - - // Constants - enum - { - BLOCK_THREADS = BlockHistogramTilesPolicy::BLOCK_THREADS, - ITEMS_PER_THREAD = BlockHistogramTilesPolicy::ITEMS_PER_THREAD, - TILE_CHANNEL_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - TILE_ITEMS = TILE_CHANNEL_ITEMS * CHANNELS, - }; - - // Shared memory type required by this thread block - typedef NullType TempStorage; - - - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - /// Reference to output histograms - HistoCounter* (&d_out_histograms)[ACTIVE_CHANNELS]; - - /// Input data to reduce - InputIteratorRA d_in; - - - //--------------------------------------------------------------------- - // Interface - //--------------------------------------------------------------------- - - /** - * Constructor - */ - __device__ __forceinline__ BlockHistogramTilesGlobalAtomic( - TempStorage &temp_storage, ///< Reference to temp_storage - InputIteratorRA d_in, ///< Input data to reduce - HistoCounter* (&d_out_histograms)[ACTIVE_CHANNELS]) ///< Reference to output histograms - : - d_in(d_in), - d_out_histograms(d_out_histograms) - {} - - - /** - * Process a single tile of input - */ - template - __device__ __forceinline__ void ConsumeTile( - SizeT block_offset, ///< The offset the tile to consume - int valid_items = TILE_ITEMS) ///< The number of valid items in the tile - { - if (FULL_TILE) - { - // Full tile of samples to read and composite - SampleT items[ITEMS_PER_THREAD][CHANNELS]; - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - #pragma unroll - for (int CHANNEL = 0; CHANNEL < CHANNELS; ++CHANNEL) - { - if (CHANNEL < ACTIVE_CHANNELS) - { - items[ITEM][CHANNEL] = d_in[block_offset + (ITEM * BLOCK_THREADS * CHANNELS) + (threadIdx.x * CHANNELS) + CHANNEL]; - } - } - } - - __threadfence_block(); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - #pragma unroll - for (int CHANNEL = 0; CHANNEL < CHANNELS; ++CHANNEL) - { - if (CHANNEL < ACTIVE_CHANNELS) - { - atomicAdd(d_out_histograms[CHANNEL] + items[ITEM][CHANNEL], 1); - } - } - } - } - else - { - // Only a partially-full tile of samples to read and composite - int bounds = valid_items - (threadIdx.x * CHANNELS); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - #pragma unroll - for (int CHANNEL = 0; CHANNEL < CHANNELS; ++CHANNEL) - { - if (((ACTIVE_CHANNELS == CHANNELS) || (CHANNEL < ACTIVE_CHANNELS)) && ((ITEM * BLOCK_THREADS * CHANNELS) + CHANNEL < bounds)) - { - SampleT item = d_in[block_offset + (ITEM * BLOCK_THREADS * CHANNELS) + (threadIdx.x * CHANNELS) + CHANNEL]; - atomicAdd(d_out_histograms[CHANNEL] + item, 1); - } - } - } - - } - } - - - /** - * Aggregate results into output - */ - __device__ __forceinline__ void AggregateOutput() - {} -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/block/specializations/block_histo_tiles_satomic.cuh b/kokkos/kokkos/TPL/cub/device/block/specializations/block_histo_tiles_satomic.cuh deleted file mode 100644 index c55d789..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/specializations/block_histo_tiles_satomic.cuh +++ /dev/null @@ -1,237 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockHistogramTilesSharedAtomic implements a stateful abstraction of CUDA thread blocks for histogramming multiple tiles as part of device-wide histogram using shared atomics - */ - -#pragma once - -#include - -#include "../../../util_type.cuh" -#include "../../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * BlockHistogramTilesSharedAtomic implements a stateful abstraction of CUDA thread blocks for histogramming multiple tiles as part of device-wide histogram using shared atomics - */ -template < - typename BlockHistogramTilesPolicy, ///< Tuning policy - int BINS, ///< Number of histogram bins - int CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of active channels being histogrammed) - int ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed - typename InputIteratorRA, ///< The input iterator type (may be a simple pointer type). Must have a value type that can be cast as an integer in the range [0..BINS-1] - typename HistoCounter, ///< Integral type for counting sample occurrences per histogram bin - typename SizeT> ///< Integer type for offsets -struct BlockHistogramTilesSharedAtomic -{ - //--------------------------------------------------------------------- - // Types and constants - //--------------------------------------------------------------------- - - // Sample type - typedef typename std::iterator_traits::value_type SampleT; - - // Constants - enum - { - BLOCK_THREADS = BlockHistogramTilesPolicy::BLOCK_THREADS, - ITEMS_PER_THREAD = BlockHistogramTilesPolicy::ITEMS_PER_THREAD, - TILE_CHANNEL_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - TILE_ITEMS = TILE_CHANNEL_ITEMS * CHANNELS, - }; - - /// Shared memory type required by this thread block - struct _TempStorage - { - HistoCounter histograms[ACTIVE_CHANNELS][BINS + 1]; // One word of padding between channel histograms to prevent warps working on different histograms from hammering on the same bank - }; - - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - /// Reference to temp_storage - _TempStorage &temp_storage; - - /// Reference to output histograms - HistoCounter* (&d_out_histograms)[ACTIVE_CHANNELS]; - - /// Input data to reduce - InputIteratorRA d_in; - - - //--------------------------------------------------------------------- - // Interface - //--------------------------------------------------------------------- - - /** - * Constructor - */ - __device__ __forceinline__ BlockHistogramTilesSharedAtomic( - TempStorage &temp_storage, ///< Reference to temp_storage - InputIteratorRA d_in, ///< Input data to reduce - HistoCounter* (&d_out_histograms)[ACTIVE_CHANNELS]) ///< Reference to output histograms - : - temp_storage(temp_storage.Alias()), - d_in(d_in), - d_out_histograms(d_out_histograms) - { - // Initialize histogram bin counts to zeros - #pragma unroll - for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) - { - int histo_offset = 0; - - #pragma unroll - for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) - { - this->temp_storage.histograms[CHANNEL][histo_offset + threadIdx.x] = 0; - } - // Finish up with guarded initialization if necessary - if ((BINS % BLOCK_THREADS != 0) && (histo_offset + threadIdx.x < BINS)) - { - this->temp_storage.histograms[CHANNEL][histo_offset + threadIdx.x] = 0; - } - } - } - - - /** - * Process a single tile of input - */ - template - __device__ __forceinline__ void ConsumeTile( - SizeT block_offset, ///< The offset the tile to consume - int valid_items = TILE_ITEMS) ///< The number of valid items in the tile - { - if (FULL_TILE) - { - // Full tile of samples to read and composite - SampleT items[ITEMS_PER_THREAD][CHANNELS]; - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - #pragma unroll - for (int CHANNEL = 0; CHANNEL < CHANNELS; ++CHANNEL) - { - if (CHANNEL < ACTIVE_CHANNELS) - { - items[ITEM][CHANNEL] = d_in[block_offset + (ITEM * BLOCK_THREADS * CHANNELS) + (threadIdx.x * CHANNELS) + CHANNEL]; - } - } - } - - __threadfence_block(); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - #pragma unroll - for (int CHANNEL = 0; CHANNEL < CHANNELS; ++CHANNEL) - { - if (CHANNEL < ACTIVE_CHANNELS) - { - atomicAdd(temp_storage.histograms[CHANNEL] + items[ITEM][CHANNEL], 1); - } - } - } - - __threadfence_block(); - } - else - { - // Only a partially-full tile of samples to read and composite - int bounds = valid_items - (threadIdx.x * CHANNELS); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - #pragma unroll - for (int CHANNEL = 0; CHANNEL < CHANNELS; ++CHANNEL) - { - if (((ACTIVE_CHANNELS == CHANNELS) || (CHANNEL < ACTIVE_CHANNELS)) && ((ITEM * BLOCK_THREADS * CHANNELS) + CHANNEL < bounds)) - { - SampleT item = d_in[block_offset + (ITEM * BLOCK_THREADS * CHANNELS) + (threadIdx.x * CHANNELS) + CHANNEL]; - atomicAdd(temp_storage.histograms[CHANNEL] + item, 1); - } - } - } - - } - } - - - /** - * Aggregate results into output - */ - __device__ __forceinline__ void AggregateOutput() - { - // Barrier to ensure shared memory histograms are coherent - __syncthreads(); - - // Copy shared memory histograms to output - #pragma unroll - for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) - { - int channel_offset = (blockIdx.x * BINS); - int histo_offset = 0; - - #pragma unroll - for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) - { - d_out_histograms[CHANNEL][channel_offset + histo_offset + threadIdx.x] = temp_storage.histograms[CHANNEL][histo_offset + threadIdx.x]; - } - // Finish up with guarded initialization if necessary - if ((BINS % BLOCK_THREADS != 0) && (histo_offset + threadIdx.x < BINS)) - { - d_out_histograms[CHANNEL][channel_offset + histo_offset + threadIdx.x] = temp_storage.histograms[CHANNEL][histo_offset + threadIdx.x]; - } - } - } -}; - - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/block/specializations/block_histo_tiles_sort.cuh b/kokkos/kokkos/TPL/cub/device/block/specializations/block_histo_tiles_sort.cuh deleted file mode 100644 index 0f82130..0000000 --- a/kokkos/kokkos/TPL/cub/device/block/specializations/block_histo_tiles_sort.cuh +++ /dev/null @@ -1,364 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::BlockHistogramTilesSort implements a stateful abstraction of CUDA thread blocks for histogramming multiple tiles as part of device-wide histogram using local sorting - */ - -#pragma once - -#include - -#include "../../../block/block_radix_sort.cuh" -#include "../../../block/block_discontinuity.cuh" -#include "../../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * BlockHistogramTilesSort implements a stateful abstraction of CUDA thread blocks for histogramming multiple tiles as part of device-wide histogram using local sorting - */ -template < - typename BlockHistogramTilesPolicy, ///< Tuning policy - int BINS, ///< Number of histogram bins per channel - int CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of active channels being histogrammed) - int ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed - typename InputIteratorRA, ///< The input iterator type (may be a simple pointer type). Must have a value type that can be cast as an integer in the range [0..BINS-1] - typename HistoCounter, ///< Integral type for counting sample occurrences per histogram bin - typename SizeT> ///< Integer type for offsets -struct BlockHistogramTilesSort -{ - //--------------------------------------------------------------------- - // Types and constants - //--------------------------------------------------------------------- - - // Sample type - typedef typename std::iterator_traits::value_type SampleT; - - // Constants - enum - { - BLOCK_THREADS = BlockHistogramTilesPolicy::BLOCK_THREADS, - ITEMS_PER_THREAD = BlockHistogramTilesPolicy::ITEMS_PER_THREAD, - TILE_CHANNEL_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, - TILE_ITEMS = TILE_CHANNEL_ITEMS * CHANNELS, - - STRIPED_COUNTERS_PER_THREAD = (BINS + BLOCK_THREADS - 1) / BLOCK_THREADS, - }; - - // Parameterize BlockRadixSort type for our thread block - typedef BlockRadixSort BlockRadixSortT; - - // Parameterize BlockDiscontinuity type for our thread block - typedef BlockDiscontinuity BlockDiscontinuityT; - - /// Shared memory type required by this thread block - union _TempStorage - { - // Storage for sorting bin values - typename BlockRadixSortT::TempStorage sort; - - struct - { - // Storage for detecting discontinuities in the tile of sorted bin values - typename BlockDiscontinuityT::TempStorage flag; - - // Storage for noting begin/end offsets of bin runs in the tile of sorted bin values - int run_begin[BLOCK_THREADS * STRIPED_COUNTERS_PER_THREAD]; - int run_end[BLOCK_THREADS * STRIPED_COUNTERS_PER_THREAD]; - }; - }; - - - /// Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - // Discontinuity functor - struct DiscontinuityOp - { - // Reference to temp_storage - _TempStorage &temp_storage; - - // Constructor - __device__ __forceinline__ DiscontinuityOp(_TempStorage &temp_storage) : - temp_storage(temp_storage) - {} - - // Discontinuity predicate - __device__ __forceinline__ bool operator()(const SampleT &a, const SampleT &b, int b_index) - { - if (a != b) - { - // Note the begin/end offsets in shared storage - temp_storage.run_begin[b] = b_index; - temp_storage.run_end[a] = b_index; - - return true; - } - else - { - return false; - } - } - }; - - - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - /// Reference to temp_storage - _TempStorage &temp_storage; - - /// Histogram counters striped across threads - HistoCounter thread_counters[ACTIVE_CHANNELS][STRIPED_COUNTERS_PER_THREAD]; - - /// Reference to output histograms - HistoCounter* (&d_out_histograms)[ACTIVE_CHANNELS]; - - /// Input data to reduce - InputIteratorRA d_in; - - - //--------------------------------------------------------------------- - // Interface - //--------------------------------------------------------------------- - - /** - * Constructor - */ - __device__ __forceinline__ BlockHistogramTilesSort( - TempStorage &temp_storage, ///< Reference to temp_storage - InputIteratorRA d_in, ///< Input data to reduce - HistoCounter* (&d_out_histograms)[ACTIVE_CHANNELS]) ///< Reference to output histograms - : - temp_storage(temp_storage.Alias()), - d_in(d_in), - d_out_histograms(d_out_histograms) - { - // Initialize histogram counters striped across threads - #pragma unroll - for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) - { - #pragma unroll - for (int COUNTER = 0; COUNTER < STRIPED_COUNTERS_PER_THREAD; ++COUNTER) - { - thread_counters[CHANNEL][COUNTER] = 0; - } - } - } - - - /** - * Composite a tile of input items - */ - __device__ __forceinline__ void Composite( - SampleT (&items)[ITEMS_PER_THREAD], ///< Tile of samples - HistoCounter thread_counters[STRIPED_COUNTERS_PER_THREAD]) ///< Histogram counters striped across threads - { - // Sort bytes in blocked arrangement - BlockRadixSortT(temp_storage.sort).Sort(items); - - __syncthreads(); - - // Initialize the shared memory's run_begin and run_end for each bin - #pragma unroll - for (int COUNTER = 0; COUNTER < STRIPED_COUNTERS_PER_THREAD; ++COUNTER) - { - temp_storage.run_begin[(COUNTER * BLOCK_THREADS) + threadIdx.x] = TILE_CHANNEL_ITEMS; - temp_storage.run_end[(COUNTER * BLOCK_THREADS) + threadIdx.x] = TILE_CHANNEL_ITEMS; - } - - __syncthreads(); - - // Note the begin/end run offsets of bin runs in the sorted tile - int flags[ITEMS_PER_THREAD]; // unused - DiscontinuityOp flag_op(temp_storage); - BlockDiscontinuityT(temp_storage.flag).FlagHeads(flags, items, flag_op); - - // Update begin for first item - if (threadIdx.x == 0) temp_storage.run_begin[items[0]] = 0; - - __syncthreads(); - - // Composite into histogram - // Initialize the shared memory's run_begin and run_end for each bin - #pragma unroll - for (int COUNTER = 0; COUNTER < STRIPED_COUNTERS_PER_THREAD; ++COUNTER) - { - int bin = (COUNTER * BLOCK_THREADS) + threadIdx.x; - HistoCounter run_length = temp_storage.run_end[bin] - temp_storage.run_begin[bin]; - - thread_counters[COUNTER] += run_length; - } - } - - - /** - * Process one channel within a tile. - */ - template - __device__ __forceinline__ void ConsumeTileChannel( - int channel, - SizeT block_offset, - int valid_items) - { - // Load items in striped fashion - if (FULL_TILE) - { - // Full tile of samples to read and composite - SampleT items[ITEMS_PER_THREAD]; - - // Unguarded loads - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = d_in[channel + block_offset + (ITEM * BLOCK_THREADS * CHANNELS) + (threadIdx.x * CHANNELS)]; - } - - // Composite our histogram data - Composite(items, thread_counters[channel]); - } - else - { - // Only a partially-full tile of samples to read and composite - SampleT items[ITEMS_PER_THREAD]; - - // Assign our tid as the bin for out-of-bounds items (to give an even distribution), and keep track of how oob items to subtract out later - int bounds = (valid_items - (threadIdx.x * CHANNELS)); - - #pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) - { - items[ITEM] = ((ITEM * BLOCK_THREADS * CHANNELS) < bounds) ? - d_in[channel + block_offset + (ITEM * BLOCK_THREADS * CHANNELS) + (threadIdx.x * CHANNELS)] : - 0; - } - - // Composite our histogram data - Composite(items, thread_counters[channel]); - - __syncthreads(); - - // Correct the overcounting in the zero-bin from invalid (out-of-bounds) items - if (threadIdx.x == 0) - { - int extra = (TILE_ITEMS - valid_items) / CHANNELS; - thread_counters[channel][0] -= extra; - } - } - } - - - /** - * Template iteration over channels (to silence not-unrolled warnings for SM10-13). Inductive step. - */ - template - struct IterateChannels - { - /** - * Process one channel within a tile. - */ - static __device__ __forceinline__ void ConsumeTileChannel( - BlockHistogramTilesSort *cta, - SizeT block_offset, - int valid_items) - { - __syncthreads(); - - cta->ConsumeTileChannel(CHANNEL, block_offset, valid_items); - - IterateChannels::ConsumeTileChannel(cta, block_offset, valid_items); - } - }; - - - /** - * Template iteration over channels (to silence not-unrolled warnings for SM10-13). Base step. - */ - template - struct IterateChannels - { - static __device__ __forceinline__ void ConsumeTileChannel(BlockHistogramTilesSort *cta, SizeT block_offset, int valid_items) {} - }; - - - /** - * Process a single tile of input - */ - template - __device__ __forceinline__ void ConsumeTile( - SizeT block_offset, ///< The offset the tile to consume - int valid_items = TILE_ITEMS) ///< The number of valid items in the tile - { - // First channel - ConsumeTileChannel(0, block_offset, valid_items); - - // Iterate through remaining channels - IterateChannels::ConsumeTileChannel(this, block_offset, valid_items); - } - - - /** - * Aggregate results into output - */ - __device__ __forceinline__ void AggregateOutput() - { - // Copy counters striped across threads into the histogram output - #pragma unroll - for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) - { - int channel_offset = (blockIdx.x * BINS); - - #pragma unroll - for (int COUNTER = 0; COUNTER < STRIPED_COUNTERS_PER_THREAD; ++COUNTER) - { - int bin = (COUNTER * BLOCK_THREADS) + threadIdx.x; - - if ((STRIPED_COUNTERS_PER_THREAD * BLOCK_THREADS == BINS) || (bin < BINS)) - { - d_out_histograms[CHANNEL][channel_offset + bin] = thread_counters[CHANNEL][COUNTER]; - } - } - } - } -}; - - - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/device/device_histogram.cuh b/kokkos/kokkos/TPL/cub/device/device_histogram.cuh deleted file mode 100644 index 6f5a74d..0000000 --- a/kokkos/kokkos/TPL/cub/device/device_histogram.cuh +++ /dev/null @@ -1,1062 +0,0 @@ - -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from samples data residing within global memory. - */ - -#pragma once - -#include -#include - -#include "block/block_histo_tiles.cuh" -#include "../grid/grid_even_share.cuh" -#include "../grid/grid_queue.cuh" -#include "../util_debug.cuh" -#include "../util_device.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Kernel entry points - *****************************************************************************/ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - -/** - * Initialization pass kernel entry point (multi-block). Prepares queue descriptors zeroes global counters. - */ -template < - int BINS, ///< Number of histogram bins per channel - int ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed - typename SizeT, ///< Integer type used for global array indexing - typename HistoCounter> ///< Integral type for counting sample occurrences per histogram bin -__launch_bounds__ (BINS, 1) -__global__ void InitHistoKernel( - GridQueue grid_queue, ///< [in] Descriptor for performing dynamic mapping of tile data to thread blocks - ArrayWrapper d_out_histograms, ///< [out] Histogram counter data having logical dimensions HistoCounter[ACTIVE_CHANNELS][BINS] - SizeT num_samples) ///< [in] Total number of samples \p d_samples for all channels -{ - d_out_histograms.array[blockIdx.x][threadIdx.x] = 0; - if (threadIdx.x == 0) grid_queue.ResetDrain(num_samples); -} - - -/** - * Histogram pass kernel entry point (multi-block). Computes privatized histograms, one per thread block. - */ -template < - typename BlockHistogramTilesPolicy, ///< Tuning policy for cub::BlockHistogramTiles abstraction - int BINS, ///< Number of histogram bins per channel - int CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) - int ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed - typename InputIteratorRA, ///< The input iterator type (may be a simple pointer type). Must have a value type that is assignable to unsigned char - typename HistoCounter, ///< Integral type for counting sample occurrences per histogram bin - typename SizeT> ///< Integer type used for global array indexing -__launch_bounds__ (int(BlockHistogramTilesPolicy::BLOCK_THREADS), BlockHistogramTilesPolicy::SM_OCCUPANCY) -__global__ void MultiBlockHistogramKernel( - InputIteratorRA d_samples, ///< [in] Array of sample data. The samples from different channels are assumed to be interleaved (e.g., an array of 32b pixels where each pixel consists of four RGBA 8b samples). - ArrayWrapper d_out_histograms, ///< [out] Histogram counter data having logical dimensions HistoCounter[ACTIVE_CHANNELS][gridDim.x][BINS] - SizeT num_samples, ///< [in] Total number of samples \p d_samples for all channels - GridEvenShare even_share, ///< [in] Descriptor for how to map an even-share of tiles across thread blocks - GridQueue queue) ///< [in] Descriptor for performing dynamic mapping of tile data to thread blocks -{ - // Constants - enum - { - BLOCK_THREADS = BlockHistogramTilesPolicy::BLOCK_THREADS, - ITEMS_PER_THREAD = BlockHistogramTilesPolicy::ITEMS_PER_THREAD, - TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD, - }; - - // Thread block type for compositing input tiles - typedef BlockHistogramTiles BlockHistogramTilesT; - - // Shared memory for BlockHistogramTiles - __shared__ typename BlockHistogramTilesT::TempStorage temp_storage; - - // Consume input tiles - BlockHistogramTilesT(temp_storage, d_samples, d_out_histograms.array).ConsumeTiles( - num_samples, - even_share, - queue, - Int2Type()); -} - - -/** - * Block-aggregation pass kernel entry point (single-block). Aggregates privatized threadblock histograms from a previous multi-block histogram pass. - */ -template < - int BINS, ///< Number of histogram bins per channel - int ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed - typename HistoCounter> ///< Integral type for counting sample occurrences per histogram bin -__launch_bounds__ (BINS, 1) -__global__ void AggregateHistoKernel( - HistoCounter* d_block_histograms, ///< [in] Histogram counter data having logical dimensions HistoCounter[ACTIVE_CHANNELS][num_threadblocks][BINS] - ArrayWrapper d_out_histograms, ///< [out] Histogram counter data having logical dimensions HistoCounter[ACTIVE_CHANNELS][BINS] - int num_threadblocks) ///< [in] Number of threadblock histograms per channel in \p d_block_histograms -{ - // Accumulate threadblock-histograms from the channel - HistoCounter bin_aggregate = 0; - - int block_offset = blockIdx.x * (num_threadblocks * BINS); - int block_oob = block_offset + (num_threadblocks * BINS); - -#if CUB_PTX_ARCH >= 200 - #pragma unroll 32 -#endif - while (block_offset < block_oob) - { - bin_aggregate += d_block_histograms[block_offset + threadIdx.x]; - block_offset += BINS; - } - - // Output - d_out_histograms.array[blockIdx.x][threadIdx.x] = bin_aggregate; -} - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - - -/****************************************************************************** - * DeviceHistogram - *****************************************************************************/ - -/** - * \brief DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from samples data residing within global memory. ![](histogram_logo.png) - * \ingroup DeviceModule - * - * \par Overview - * A histogram - * counts the number of observations that fall into each of the disjoint categories (known as bins). - * - * \par Usage Considerations - * \cdp_class{DeviceHistogram} - * - * \par Performance - * - * \image html histo_perf.png - * - */ -struct DeviceHistogram -{ -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - /// Generic structure for encapsulating dispatch properties. Mirrors the constants within BlockHistogramTilesPolicy. - struct KernelDispachParams - { - // Policy fields - int block_threads; - int items_per_thread; - BlockHistogramTilesAlgorithm block_algorithm; - GridMappingStrategy grid_mapping; - int subscription_factor; - - // Derived fields - int channel_tile_size; - - template - __host__ __device__ __forceinline__ - void Init(int subscription_factor = 1) - { - block_threads = BlockHistogramTilesPolicy::BLOCK_THREADS; - items_per_thread = BlockHistogramTilesPolicy::ITEMS_PER_THREAD; - block_algorithm = BlockHistogramTilesPolicy::GRID_ALGORITHM; - grid_mapping = BlockHistogramTilesPolicy::GRID_MAPPING; - this->subscription_factor = subscription_factor; - - channel_tile_size = block_threads * items_per_thread; - } - - __host__ __device__ __forceinline__ - void Print() - { - printf("%d, %d, %d, %d, %d", - block_threads, - items_per_thread, - block_algorithm, - grid_mapping, - subscription_factor); - } - - }; - - - /****************************************************************************** - * Tuning policies - ******************************************************************************/ - - /// Specializations of tuned policy types for different PTX architectures - template < - int CHANNELS, - int ACTIVE_CHANNELS, - BlockHistogramTilesAlgorithm GRID_ALGORITHM, - int ARCH> - struct TunedPolicies; - - /// SM35 tune - template - struct TunedPolicies - { - typedef BlockHistogramTilesPolicy< - (GRID_ALGORITHM == GRID_HISTO_SORT) ? 128 : 256, - (GRID_ALGORITHM == GRID_HISTO_SORT) ? 12 : (30 / ACTIVE_CHANNELS), - GRID_ALGORITHM, - (GRID_ALGORITHM == GRID_HISTO_SORT) ? GRID_MAPPING_DYNAMIC : GRID_MAPPING_EVEN_SHARE, - (GRID_ALGORITHM == GRID_HISTO_SORT) ? 8 : 1> MultiBlockPolicy; - enum { SUBSCRIPTION_FACTOR = 7 }; - }; - - /// SM30 tune - template - struct TunedPolicies - { - typedef BlockHistogramTilesPolicy< - 128, - (GRID_ALGORITHM == GRID_HISTO_SORT) ? 20 : (22 / ACTIVE_CHANNELS), - GRID_ALGORITHM, - (GRID_ALGORITHM == GRID_HISTO_SORT) ? GRID_MAPPING_DYNAMIC : GRID_MAPPING_EVEN_SHARE, - 1> MultiBlockPolicy; - enum { SUBSCRIPTION_FACTOR = 1 }; - }; - - /// SM20 tune - template - struct TunedPolicies - { - typedef BlockHistogramTilesPolicy< - 128, - (GRID_ALGORITHM == GRID_HISTO_SORT) ? 21 : (23 / ACTIVE_CHANNELS), - GRID_ALGORITHM, - GRID_MAPPING_DYNAMIC, - 1> MultiBlockPolicy; - enum { SUBSCRIPTION_FACTOR = 1 }; - }; - - /// SM10 tune - template - struct TunedPolicies - { - typedef BlockHistogramTilesPolicy< - 128, - 7, - GRID_HISTO_SORT, // (use sort regardless because atomics are perf-useless) - GRID_MAPPING_EVEN_SHARE, - 1> MultiBlockPolicy; - enum { SUBSCRIPTION_FACTOR = 1 }; - }; - - - /// Tuning policy for the PTX architecture that DeviceHistogram operations will get dispatched to - template < - int CHANNELS, - int ACTIVE_CHANNELS, - BlockHistogramTilesAlgorithm GRID_ALGORITHM> - struct PtxDefaultPolicies - { - static const int PTX_TUNE_ARCH = (CUB_PTX_ARCH >= 350) ? - 350 : - (CUB_PTX_ARCH >= 300) ? - 300 : - (CUB_PTX_ARCH >= 200) ? - 200 : - 100; - - // Tuned policy set for the current PTX compiler pass - typedef TunedPolicies PtxTunedPolicies; - - // Subscription factor for the current PTX compiler pass - static const int SUBSCRIPTION_FACTOR = PtxTunedPolicies::SUBSCRIPTION_FACTOR; - - // MultiBlockPolicy that opaquely derives from the specialization corresponding to the current PTX compiler pass - struct MultiBlockPolicy : PtxTunedPolicies::MultiBlockPolicy {}; - - /** - * Initialize dispatch params with the policies corresponding to the PTX assembly we will use - */ - static void InitDispatchParams(int ptx_version, KernelDispachParams &multi_block_dispatch_params) - { - if (ptx_version >= 350) - { - typedef TunedPolicies TunedPolicies; - multi_block_dispatch_params.Init(TunedPolicies::SUBSCRIPTION_FACTOR); - } - else if (ptx_version >= 300) - { - typedef TunedPolicies TunedPolicies; - multi_block_dispatch_params.Init(TunedPolicies::SUBSCRIPTION_FACTOR); - } - else if (ptx_version >= 200) - { - typedef TunedPolicies TunedPolicies; - multi_block_dispatch_params.Init(TunedPolicies::SUBSCRIPTION_FACTOR); - } - else - { - typedef TunedPolicies TunedPolicies; - multi_block_dispatch_params.Init(TunedPolicies::SUBSCRIPTION_FACTOR); - } - } - }; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /** - * Internal dispatch routine for invoking device-wide, multi-channel, histogram - */ - template < - int BINS, ///< Number of histogram bins per channel - int CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) - int ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed - typename InitHistoKernelPtr, ///< Function type of cub::InitHistoKernel - typename MultiBlockHistogramKernelPtr, ///< Function type of cub::MultiBlockHistogramKernel - typename AggregateHistoKernelPtr, ///< Function type of cub::AggregateHistoKernel - typename InputIteratorRA, ///< The input iterator type (may be a simple pointer type). Must have a value type that is assignable to unsigned char - typename HistoCounter, ///< Integral type for counting sample occurrences per histogram bin - typename SizeT> ///< Integer type used for global array indexing - __host__ __device__ __forceinline__ - static cudaError_t Dispatch( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InitHistoKernelPtr init_kernel, ///< [in] Kernel function pointer to parameterization of cub::InitHistoKernel - MultiBlockHistogramKernelPtr multi_block_kernel, ///< [in] Kernel function pointer to parameterization of cub::MultiBlockHistogramKernel - AggregateHistoKernelPtr aggregate_kernel, ///< [in] Kernel function pointer to parameterization of cub::AggregateHistoKernel - KernelDispachParams &multi_block_dispatch_params, ///< [in] Dispatch parameters that match the policy that \p multi_block_kernel was compiled for - InputIteratorRA d_samples, ///< [in] Input samples to histogram - HistoCounter *d_histograms[ACTIVE_CHANNELS], ///< [out] Array of channel histograms, each having BINS counters of integral type \p HistoCounter. - SizeT num_samples, ///< [in] Number of samples to process - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { -#ifndef CUB_RUNTIME_ENABLED - - // Kernel launch not supported from this device - return CubDebug(cudaErrorNotSupported); - -#else - - cudaError error = cudaSuccess; - do - { - // Get device ordinal - int device_ordinal; - if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; - - // Get SM count - int sm_count; - if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; - - // Get a rough estimate of multi_block_kernel SM occupancy based upon the maximum SM occupancy of the targeted PTX architecture - int multi_block_sm_occupancy = CUB_MIN( - ArchProps::MAX_SM_THREADBLOCKS, - ArchProps::MAX_SM_THREADS / multi_block_dispatch_params.block_threads); - -#ifndef __CUDA_ARCH__ - // We're on the host, so come up with a more accurate estimate of multi_block_kernel SM occupancy from actual device properties - Device device_props; - if (CubDebug(error = device_props.Init(device_ordinal))) break; - - if (CubDebug(error = device_props.MaxSmOccupancy( - multi_block_sm_occupancy, - multi_block_kernel, - multi_block_dispatch_params.block_threads))) break; -#endif - - // Get device occupancy for multi_block_kernel - int multi_block_occupancy = multi_block_sm_occupancy * sm_count; - - // Even-share work distribution - GridEvenShare even_share; - - // Get tile size for multi_block_kernel - int multi_block_tile_size = multi_block_dispatch_params.channel_tile_size * CHANNELS; - - // Get grid size for multi_block_kernel - int multi_block_grid_size; - switch (multi_block_dispatch_params.grid_mapping) - { - case GRID_MAPPING_EVEN_SHARE: - - // Work is distributed evenly - even_share.GridInit( - num_samples, - multi_block_occupancy * multi_block_dispatch_params.subscription_factor, - multi_block_tile_size); - multi_block_grid_size = even_share.grid_size; - break; - - case GRID_MAPPING_DYNAMIC: - - // Work is distributed dynamically - int num_tiles = (num_samples + multi_block_tile_size - 1) / multi_block_tile_size; - multi_block_grid_size = (num_tiles < multi_block_occupancy) ? - num_tiles : // Not enough to fill the device with threadblocks - multi_block_occupancy; // Fill the device with threadblocks - break; - }; - - // Temporary storage allocation requirements - void* allocations[2]; - size_t allocation_sizes[2] = - { - ACTIVE_CHANNELS * multi_block_grid_size * sizeof(HistoCounter) * BINS, // bytes needed for privatized histograms - GridQueue::AllocationSize() // bytes needed for grid queue descriptor - }; - - if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; - - // Return if the caller is simply requesting the size of the storage allocation - if (d_temp_storage == NULL) - return cudaSuccess; - - // Privatized per-block reductions - HistoCounter *d_block_histograms = (HistoCounter*) allocations[0]; - - // Grid queue descriptor - GridQueue queue(allocations[1]); - - // Setup array wrapper for histogram channel output (because we can't pass static arrays as kernel parameters) - ArrayWrapper d_histo_wrapper; - for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) - d_histo_wrapper.array[CHANNEL] = d_histograms[CHANNEL]; - - // Setup array wrapper for temporary histogram channel output (because we can't pass static arrays as kernel parameters) - ArrayWrapper d_temp_histo_wrapper; - for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) - d_temp_histo_wrapper.array[CHANNEL] = d_block_histograms + (CHANNEL * multi_block_grid_size * BINS); - - // Log init_kernel configuration - if (stream_synchronous) CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", ACTIVE_CHANNELS, BINS, (long long) stream); - - // Invoke init_kernel to initialize counters and queue descriptor - init_kernel<<>>(queue, d_histo_wrapper, num_samples); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - - // Whether we need privatized histograms (i.e., non-global atomics and multi-block) - bool privatized_temporaries = (multi_block_grid_size > 1) && (multi_block_dispatch_params.block_algorithm != GRID_HISTO_GLOBAL_ATOMIC); - - // Log multi_block_kernel configuration - if (stream_synchronous) CubLog("Invoking multi_block_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", - multi_block_grid_size, multi_block_dispatch_params.block_threads, (long long) stream, multi_block_dispatch_params.items_per_thread, multi_block_sm_occupancy); - - // Invoke multi_block_kernel - multi_block_kernel<<>>( - d_samples, - (privatized_temporaries) ? - d_temp_histo_wrapper : - d_histo_wrapper, - num_samples, - even_share, - queue); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - - // Aggregate privatized block histograms if necessary - if (privatized_temporaries) - { - // Log aggregate_kernel configuration - if (stream_synchronous) CubLog("Invoking aggregate_kernel<<<%d, %d, 0, %lld>>>()\n", - ACTIVE_CHANNELS, BINS, (long long) stream); - - // Invoke aggregate_kernel - aggregate_kernel<<>>( - d_block_histograms, - d_histo_wrapper, - multi_block_grid_size); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - } - } - while (0); - - return error; -#endif // CUB_RUNTIME_ENABLED - } - - - /** - * \brief Computes a device-wide histogram - * - * \tparam GRID_ALGORITHM cub::BlockHistogramTilesAlgorithm enumerator specifying the underlying algorithm to use - * \tparam CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) - * \tparam ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) Must have a value type that is assignable to unsigned char - * \tparam HistoCounter [inferred] Integral type for counting sample occurrences per histogram bin - */ - template < - BlockHistogramTilesAlgorithm GRID_ALGORITHM, - int BINS, ///< Number of histogram bins per channel - int CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) - int ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed - typename InputIteratorRA, ///< The input iterator type (may be a simple pointer type). Must have a value type that is assignable to unsigned char - typename HistoCounter> ///< Integral type for counting sample occurrences per histogram bin - __host__ __device__ __forceinline__ - static cudaError_t Dispatch( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_samples, ///< [in] Input samples to histogram - HistoCounter *d_histograms[ACTIVE_CHANNELS], ///< [out] Array of channel histograms, each having BINS counters of integral type \p HistoCounter. - int num_samples, ///< [in] Number of samples to process - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - // Type used for array indexing - typedef int SizeT; - - // Tuning polices for the PTX architecture that will get dispatched to - typedef PtxDefaultPolicies PtxDefaultPolicies; - typedef typename PtxDefaultPolicies::MultiBlockPolicy MultiBlockPolicy; - - cudaError error = cudaSuccess; - do - { - // Declare dispatch parameters - KernelDispachParams multi_block_dispatch_params; - - #ifdef __CUDA_ARCH__ - - // We're on the device, so initialize the dispatch parameters with the PtxDefaultPolicies directly - multi_block_dispatch_params.Init(PtxDefaultPolicies::SUBSCRIPTION_FACTOR); - - #else - - // We're on the host, so lookup and initialize the dispatch parameters with the policies that match the device's PTX version - int ptx_version; - if (CubDebug(error = PtxVersion(ptx_version))) break; - PtxDefaultPolicies::InitDispatchParams(ptx_version, multi_block_dispatch_params); - - #endif - - Dispatch( - d_temp_storage, - temp_storage_bytes, - InitHistoKernel, - MultiBlockHistogramKernel, - AggregateHistoKernel, - multi_block_dispatch_params, - d_samples, - d_histograms, - num_samples, - stream, - stream_synchronous); - - if (CubDebug(error)) break; - } - while (0); - - return error; - } - - #endif // DOXYGEN_SHOULD_SKIP_THIS - - - /******************************************************************//** - * \name Single-channel samples - *********************************************************************/ - //@{ - - - /** - * \brief Computes a device-wide histogram. Uses fast block-sorting to compute the histogram. Delivers consistent throughput regardless of sample diversity, but occupancy may be limited by histogram bin count. - * - * However, because histograms are privatized in shared memory, a large - * number of bins (e.g., thousands) may adversely affect occupancy and - * performance (or even the ability to launch). - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the computation of a 256-bin histogram of - * single-channel unsigned char samples. - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input samples and 256-bin output histogram - * unsigned char *d_samples; - * unsigned int *d_histogram; - * int num_items = ... - * ... - * - * // Wrap d_samples device pointer in a random-access texture iterator - * cub::TexIteratorRA d_samples_tex_itr; - * d_samples_tex_itr.BindTexture(d_samples, num_items * sizeof(unsigned char)); - * - * // Determine temporary device storage requirements for histogram computation - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceHistogram::SingleChannelSorting<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histogram, num_items); - * - * // Allocate temporary storage for histogram computation - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Compute histogram - * cub::DeviceHistogram::SingleChannelSorting<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histogram, num_items); - * - * // Unbind texture iterator - * d_samples_tex_itr.UnbindTexture(); - * - * \endcode - * - * \tparam BINS Number of histogram bins per channel - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) Must have a value type that can be cast as an integer in the range [0..BINS-1] - * \tparam HistoCounter [inferred] Integral type for counting sample occurrences per histogram bin - */ - template < - int BINS, - typename InputIteratorRA, - typename HistoCounter> - __host__ __device__ __forceinline__ - static cudaError_t SingleChannelSorting( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_samples, ///< [in] Input samples - HistoCounter* d_histogram, ///< [out] Array of BINS counters of integral type \p HistoCounter. - int num_samples, ///< [in] Number of samples to process - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - return Dispatch( - d_temp_storage, temp_storage_bytes, d_samples, &d_histogram, num_samples, stream, stream_synchronous); - } - - - /** - * \brief Computes a device-wide histogram. Uses shared-memory atomic read-modify-write operations to compute the histogram. Input samples having lower diversity can cause performance to be degraded, and occupancy may be limited by histogram bin count. - * - * However, because histograms are privatized in shared memory, a large - * number of bins (e.g., thousands) may adversely affect occupancy and - * performance (or even the ability to launch). - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the computation of a 256-bin histogram of - * single-channel unsigned char samples. - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input samples and 256-bin output histogram - * unsigned char *d_samples; - * unsigned int *d_histogram; - * int num_items = ... - * ... - * - * // Wrap d_samples device pointer in a random-access texture iterator - * cub::TexIteratorRA d_samples_tex_itr; - * d_samples_tex_itr.BindTexture(d_samples, num_items * sizeof(unsigned char)); - * - * // Determine temporary device storage requirements for histogram computation - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceHistogram::SingleChannelSorting<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histogram, num_items); - * - * // Allocate temporary storage for histogram computation - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Compute histogram - * cub::DeviceHistogram::SingleChannelSharedAtomic<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histogram, num_items); - * - * // Unbind texture iterator - * d_samples_tex_itr.UnbindTexture(); - * - * \endcode - * - * \tparam BINS Number of histogram bins per channel - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) Must have a value type that can be cast as an integer in the range [0..BINS-1] - * \tparam HistoCounter [inferred] Integral type for counting sample occurrences per histogram bin - */ - template < - int BINS, - typename InputIteratorRA, - typename HistoCounter> - __host__ __device__ __forceinline__ - static cudaError_t SingleChannelSharedAtomic( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_samples, ///< [in] Input samples - HistoCounter* d_histogram, ///< [out] Array of BINS counters of integral type \p HistoCounter. - int num_samples, ///< [in] Number of samples to process - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return Dispatch( - d_temp_storage, temp_storage_bytes, d_samples, &d_histogram, num_samples, stream, stream_synchronous); - } - - - /** - * \brief Computes a device-wide histogram. Uses global-memory atomic read-modify-write operations to compute the histogram. Input samples having lower diversity can cause performance to be degraded. - * - * Performance is not significantly impacted when computing histograms having large - * numbers of bins (e.g., thousands). - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the computation of a 256-bin histogram of - * single-channel unsigned char samples. - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input samples and 256-bin output histogram - * unsigned char *d_samples; - * unsigned int *d_histogram; - * int num_items = ... - * ... - * - * // Wrap d_samples device pointer in a random-access texture iterator - * cub::TexIteratorRA d_samples_tex_itr; - * d_samples_tex_itr.BindTexture(d_samples, num_items * sizeof(unsigned char)); - * - * // Determine temporary device storage requirements for histogram computation - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceHistogram::SingleChannelSorting<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histogram, num_items); - * - * // Allocate temporary storage for histogram computation - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Compute histogram - * cub::DeviceHistogram::SingleChannelGlobalAtomic<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histogram, num_items); - * - * // Unbind texture iterator - * d_samples_tex_itr.UnbindTexture(); - * - * \endcode - * - * \tparam BINS Number of histogram bins per channel - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) Must have a value type that can be cast as an integer in the range [0..BINS-1] - * \tparam HistoCounter [inferred] Integral type for counting sample occurrences per histogram bin - */ - template < - int BINS, - typename InputIteratorRA, - typename HistoCounter> - __host__ __device__ __forceinline__ - static cudaError_t SingleChannelGlobalAtomic( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_samples, ///< [in] Input samples - HistoCounter* d_histogram, ///< [out] Array of BINS counters of integral type \p HistoCounter. - int num_samples, ///< [in] Number of samples to process - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return Dispatch( - d_temp_storage, temp_storage_bytes, d_samples, &d_histogram, num_samples, stream, stream_synchronous); - } - - - //@} end member group - /******************************************************************//** - * \name Interleaved multi-channel samples - *********************************************************************/ - //@{ - - - /** - * \brief Computes a device-wide histogram from multi-channel data. Uses fast block-sorting to compute the histogram. Delivers consistent throughput regardless of sample diversity, but occupancy may be limited by histogram bin count. - * - * However, because histograms are privatized in shared memory, a large - * number of bins (e.g., thousands) may adversely affect occupancy and - * performance (or even the ability to launch). - * - * The total number of samples across all channels (\p num_samples) must be a whole multiple of \p CHANNELS. - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the computation of three 256-bin histograms from - * interleaved quad-channel unsigned char samples (e.g., RGB histograms from RGBA samples). - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input samples and - * // three 256-bin output histograms - * unsigned char *d_samples; - * unsigned int *d_histograms[3]; - * int num_items = ... - * ... - * - * // Wrap d_samples device pointer in a random-access texture iterator - * cub::TexIteratorRA d_samples_tex_itr; - * d_samples_tex_itr.BindTexture(d_samples, num_items * sizeof(unsigned char)); - * - * // Determine temporary device storage requirements for histogram computation - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceHistogram::MultiChannelSorting<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histograms, num_items); - * - * // Allocate temporary storage for histogram computation - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Compute histograms - * cub::DeviceHistogram::MultiChannelSorting<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histograms, num_items); - * - * // Unbind texture iterator - * d_samples_tex_itr.UnbindTexture(); - * - * \endcode - * - * \tparam BINS Number of histogram bins per channel - * \tparam CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) - * \tparam ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) Must have a value type that can be cast as an integer in the range [0..BINS-1] - * \tparam HistoCounter [inferred] Integral type for counting sample occurrences per histogram bin - */ - template < - int BINS, - int CHANNELS, - int ACTIVE_CHANNELS, - typename InputIteratorRA, - typename HistoCounter> - __host__ __device__ __forceinline__ - static cudaError_t MultiChannelSorting( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_samples, ///< [in] Input samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32b pixels where each pixel consists of four RGBA 8b samples). - HistoCounter *d_histograms[ACTIVE_CHANNELS], ///< [out] Array of channel histogram counter arrays, each having BINS counters of integral type \p HistoCounter. - int num_samples, ///< [in] Total number of samples to process in all channels, including non-active channels - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return Dispatch( - d_temp_storage, temp_storage_bytes, d_samples, d_histograms, num_samples, stream, stream_synchronous); - } - - - /** - * \brief Computes a device-wide histogram from multi-channel data. Uses shared-memory atomic read-modify-write operations to compute the histogram. Input samples having lower diversity can cause performance to be degraded, and occupancy may be limited by histogram bin count. - * - * However, because histograms are privatized in shared memory, a large - * number of bins (e.g., thousands) may adversely affect occupancy and - * performance (or even the ability to launch). - * - * The total number of samples across all channels (\p num_samples) must be a whole multiple of \p CHANNELS. - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the computation of three 256-bin histograms from - * interleaved quad-channel unsigned char samples (e.g., RGB histograms from RGBA samples). - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input samples and - * // three 256-bin output histograms - * unsigned char *d_samples; - * unsigned int *d_histograms[3]; - * int num_items = ... - * ... - * - * // Wrap d_samples device pointer in a random-access texture iterator - * cub::TexIteratorRA d_samples_tex_itr; - * d_samples_tex_itr.BindTexture(d_samples, num_items * sizeof(unsigned char)); - * - * // Determine temporary device storage requirements for histogram computation - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceHistogram::MultiChannelSharedAtomic<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histograms, num_items); - * - * // Allocate temporary storage for histogram computation - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Compute histograms - * cub::DeviceHistogram::MultiChannelSharedAtomic<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histograms, num_items); - * - * // Unbind texture iterator - * d_samples_tex_itr.UnbindTexture(); - * - * \endcode - * - * \tparam BINS Number of histogram bins per channel - * \tparam CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) - * \tparam ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) Must have a value type that can be cast as an integer in the range [0..BINS-1] - * \tparam HistoCounter [inferred] Integral type for counting sample occurrences per histogram bin - */ - template < - int BINS, - int CHANNELS, - int ACTIVE_CHANNELS, - typename InputIteratorRA, - typename HistoCounter> - __host__ __device__ __forceinline__ - static cudaError_t MultiChannelSharedAtomic( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_samples, ///< [in] Input samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32b pixels where each pixel consists of four RGBA 8b samples). - HistoCounter *d_histograms[ACTIVE_CHANNELS], ///< [out] Array of channel histogram counter arrays, each having BINS counters of integral type \p HistoCounter. - int num_samples, ///< [in] Total number of samples to process in all channels, including non-active channels - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return Dispatch( - d_temp_storage, temp_storage_bytes, d_samples, d_histograms, num_samples, stream, stream_synchronous); - } - - - /** - * \brief Computes a device-wide histogram from multi-channel data. Uses global-memory atomic read-modify-write operations to compute the histogram. Input samples having lower diversity can cause performance to be degraded. - * - * Performance is not significantly impacted when computing histograms having large - * numbers of bins (e.g., thousands). - * - * The total number of samples across all channels (\p num_samples) must be a whole multiple of \p CHANNELS. - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * Performance is often improved when referencing input samples through a texture-caching iterator, e.g., cub::TexIteratorRA or cub::TexTransformIteratorRA. - * - * \par - * The code snippet below illustrates the computation of three 256-bin histograms from - * interleaved quad-channel unsigned char samples (e.g., RGB histograms from RGBA samples). - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input samples and - * // three 256-bin output histograms - * unsigned char *d_samples; - * unsigned int *d_histograms[3]; - * int num_items = ... - * ... - * - * // Wrap d_samples device pointer in a random-access texture iterator - * cub::TexIteratorRA d_samples_tex_itr; - * d_samples_tex_itr.BindTexture(d_samples, num_items * sizeof(unsigned char)); - * - * // Determine temporary device storage requirements for histogram computation - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceHistogram::MultiChannelGlobalAtomic<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histograms, num_items); - * - * // Allocate temporary storage for histogram computation - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Compute histograms - * cub::DeviceHistogram::MultiChannelGlobalAtomic<256>(d_temp_storage, temp_storage_bytes, d_samples_tex_itr, d_histograms, num_items); - * - * // Unbind texture iterator - * d_samples_tex_itr.UnbindTexture(); - * - * \endcode - * - * \tparam BINS Number of histogram bins per channel - * \tparam CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) - * \tparam ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) Must have a value type that can be cast as an integer in the range [0..BINS-1] - * \tparam HistoCounter [inferred] Integral type for counting sample occurrences per histogram bin - */ - template < - int BINS, - int CHANNELS, - int ACTIVE_CHANNELS, - typename InputIteratorRA, - typename HistoCounter> - __host__ __device__ __forceinline__ - static cudaError_t MultiChannelGlobalAtomic( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_samples, ///< [in] Input samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32b pixels where each pixel consists of four RGBA 8b samples). - HistoCounter *d_histograms[ACTIVE_CHANNELS], ///< [out] Array of channel histogram counter arrays, each having BINS counters of integral type \p HistoCounter. - int num_samples, ///< [in] Total number of samples to process in all channels, including non-active channels - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return Dispatch( - d_temp_storage, temp_storage_bytes, d_samples, d_histograms, num_samples, stream, stream_synchronous); - } - - //@} end member group - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - - diff --git a/kokkos/kokkos/TPL/cub/device/device_radix_sort.cuh b/kokkos/kokkos/TPL/cub/device/device_radix_sort.cuh deleted file mode 100644 index 087d546..0000000 --- a/kokkos/kokkos/TPL/cub/device/device_radix_sort.cuh +++ /dev/null @@ -1,890 +0,0 @@ - -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::DeviceRadixSort provides operations for computing a device-wide, parallel reduction across data items residing within global memory. - */ - -#pragma once - -#include -#include - -#include "block/block_radix_sort_upsweep_tiles.cuh" -#include "block/block_radix_sort_downsweep_tiles.cuh" -#include "block/block_scan_tiles.cuh" -#include "../grid/grid_even_share.cuh" -#include "../util_debug.cuh" -#include "../util_device.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - - - -/****************************************************************************** - * Kernel entry points - *****************************************************************************/ - -/** - * Upsweep pass kernel entry point (multi-block). Computes privatized digit histograms, one per block. - */ -template < - typename BlockRadixSortUpsweepTilesPolicy, ///< Tuning policy for cub::BlockRadixSortUpsweepTiles abstraction - typename Key, ///< Key type - typename SizeT> ///< Integer type used for global array indexing -__launch_bounds__ (int(BlockRadixSortUpsweepTilesPolicy::BLOCK_THREADS), 1) -__global__ void RadixSortUpsweepKernel( - Key *d_keys, ///< [in] Input keys buffer - SizeT *d_spine, ///< [out] Privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) - SizeT num_items, ///< [in] Total number of input data items - int current_bit, ///< [in] Bit position of current radix digit - bool use_primary_bit_granularity, ///< [in] Whether nor not to use the primary policy (or the embedded alternate policy for smaller bit granularity) - bool first_pass, ///< [in] Whether this is the first digit pass - GridEvenShare even_share) ///< [in] Descriptor for how to map an even-share of tiles across thread blocks -{ - - // Alternate policy for when fewer bits remain - typedef typename BlockRadixSortUpsweepTilesPolicy::AltPolicy AltPolicy; - - // Parameterize two versions of BlockRadixSortUpsweepTiles type for the current configuration - typedef BlockRadixSortUpsweepTiles BlockRadixSortUpsweepTilesT; // Primary - typedef BlockRadixSortUpsweepTiles AltBlockRadixSortUpsweepTilesT; // Alternate (smaller bit granularity) - - // Shared memory storage - __shared__ union - { - typename BlockRadixSortUpsweepTilesT::TempStorage pass_storage; - typename AltBlockRadixSortUpsweepTilesT::TempStorage alt_pass_storage; - } temp_storage; - - // Initialize even-share descriptor for this thread block - even_share.BlockInit(); - - // Process input tiles (each of the first RADIX_DIGITS threads will compute a count for that digit) - if (use_primary_bit_granularity) - { - // Primary granularity - SizeT bin_count; - BlockRadixSortUpsweepTilesT(temp_storage.pass_storage, d_keys, current_bit).ProcessTiles( - even_share.block_offset, - even_share.block_oob, - bin_count); - - // Write out digit counts (striped) - if (threadIdx.x < BlockRadixSortUpsweepTilesT::RADIX_DIGITS) - { - d_spine[(gridDim.x * threadIdx.x) + blockIdx.x] = bin_count; - } - } - else - { - // Alternate granularity - // Process input tiles (each of the first RADIX_DIGITS threads will compute a count for that digit) - SizeT bin_count; - AltBlockRadixSortUpsweepTilesT(temp_storage.alt_pass_storage, d_keys, current_bit).ProcessTiles( - even_share.block_offset, - even_share.block_oob, - bin_count); - - // Write out digit counts (striped) - if (threadIdx.x < AltBlockRadixSortUpsweepTilesT::RADIX_DIGITS) - { - d_spine[(gridDim.x * threadIdx.x) + blockIdx.x] = bin_count; - } - } -} - - -/** - * Spine scan kernel entry point (single-block). Computes an exclusive prefix sum over the privatized digit histograms - */ -template < - typename BlockScanTilesPolicy, ///< Tuning policy for cub::BlockScanTiles abstraction - typename SizeT> ///< Integer type used for global array indexing -__launch_bounds__ (int(BlockScanTilesPolicy::BLOCK_THREADS), 1) -__global__ void RadixSortScanKernel( - SizeT *d_spine, ///< [in,out] Privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) - int num_counts) ///< [in] Total number of bin-counts -{ - // Parameterize the BlockScanTiles type for the current configuration - typedef BlockScanTiles BlockScanTilesT; - - // Shared memory storage - __shared__ typename BlockScanTilesT::TempStorage temp_storage; - - // Block scan instance - BlockScanTilesT block_scan(temp_storage, d_spine, d_spine, cub::Sum(), SizeT(0)) ; - - // Process full input tiles - int block_offset = 0; - RunningBlockPrefixOp prefix_op; - prefix_op.running_total = 0; - while (block_offset < num_counts) - { - block_scan.ConsumeTile(block_offset, prefix_op); - block_offset += BlockScanTilesT::TILE_ITEMS; - } -} - - -/** - * Downsweep pass kernel entry point (multi-block). Scatters keys (and values) into corresponding bins for the current digit place. - */ -template < - typename BlockRadixSortDownsweepTilesPolicy, ///< Tuning policy for cub::BlockRadixSortUpsweepTiles abstraction - typename Key, ///< Key type - typename Value, ///< Value type - typename SizeT> ///< Integer type used for global array indexing -__launch_bounds__ (int(BlockRadixSortDownsweepTilesPolicy::BLOCK_THREADS)) -__global__ void RadixSortDownsweepKernel( - Key *d_keys_in, ///< [in] Input keys ping buffer - Key *d_keys_out, ///< [in] Output keys pong buffer - Value *d_values_in, ///< [in] Input values ping buffer - Value *d_values_out, ///< [in] Output values pong buffer - SizeT *d_spine, ///< [in] Scan of privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) - SizeT num_items, ///< [in] Total number of input data items - int current_bit, ///< [in] Bit position of current radix digit - bool use_primary_bit_granularity, ///< [in] Whether nor not to use the primary policy (or the embedded alternate policy for smaller bit granularity) - bool first_pass, ///< [in] Whether this is the first digit pass - bool last_pass, ///< [in] Whether this is the last digit pass - GridEvenShare even_share) ///< [in] Descriptor for how to map an even-share of tiles across thread blocks -{ - - // Alternate policy for when fewer bits remain - typedef typename BlockRadixSortDownsweepTilesPolicy::AltPolicy AltPolicy; - - // Parameterize two versions of BlockRadixSortDownsweepTiles type for the current configuration - typedef BlockRadixSortDownsweepTiles BlockRadixSortDownsweepTilesT; - typedef BlockRadixSortDownsweepTiles AltBlockRadixSortDownsweepTilesT; - - // Shared memory storage - __shared__ union - { - typename BlockRadixSortDownsweepTilesT::TempStorage pass_storage; - typename AltBlockRadixSortDownsweepTilesT::TempStorage alt_pass_storage; - - } temp_storage; - - // Initialize even-share descriptor for this thread block - even_share.BlockInit(); - - if (use_primary_bit_granularity) - { - // Process input tiles - BlockRadixSortDownsweepTilesT(temp_storage.pass_storage, num_items, d_spine, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit).ProcessTiles( - even_share.block_offset, - even_share.block_oob); - } - else - { - // Process input tiles - AltBlockRadixSortDownsweepTilesT(temp_storage.alt_pass_storage, num_items, d_spine, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit).ProcessTiles( - even_share.block_offset, - even_share.block_oob); - } -} - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - - - - -/****************************************************************************** - * DeviceRadixSort - *****************************************************************************/ - -/** - * \brief DeviceRadixSort provides operations for computing a device-wide, parallel radix sort across data items residing within global memory. ![](sorting_logo.png) - * \ingroup DeviceModule - * - * \par Overview - * The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges - * items into ascending order. It relies upon a positional representation for - * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, - * characters, etc.) specified from least-significant to most-significant. For a - * given input sequence of keys and a set of rules specifying a total ordering - * of the symbolic alphabet, the radix sorting method produces a lexicographic - * ordering of those keys. - * - * \par - * DeviceRadixSort can sort all of the built-in C++ numeric primitive types, e.g.: - * unsigned char, \p int, \p double, etc. Although the direct radix sorting - * method can only be applied to unsigned integral types, BlockRadixSort - * is able to sort signed and floating-point types via simple bit-wise transformations - * that ensure lexicographic key ordering. - * - * \par Usage Considerations - * \cdp_class{DeviceRadixSort} - * - * \par Performance - * - * \image html lsd_sort_perf.png - * - */ -struct DeviceRadixSort -{ - #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - /// Generic structure for encapsulating dispatch properties codified in block policy. - struct KernelDispachParams - { - int block_threads; - int items_per_thread; - cudaSharedMemConfig smem_config; - int radix_bits; - int alt_radix_bits; - int subscription_factor; - int tile_size; - - template - __host__ __device__ __forceinline__ - void InitUpsweepPolicy(int subscription_factor = 1) - { - block_threads = SortBlockPolicy::BLOCK_THREADS; - items_per_thread = SortBlockPolicy::ITEMS_PER_THREAD; - radix_bits = SortBlockPolicy::RADIX_BITS; - alt_radix_bits = SortBlockPolicy::AltPolicy::RADIX_BITS; - smem_config = cudaSharedMemBankSizeFourByte; - this->subscription_factor = subscription_factor; - tile_size = block_threads * items_per_thread; - } - - template - __host__ __device__ __forceinline__ - void InitScanPolicy() - { - block_threads = ScanBlockPolicy::BLOCK_THREADS; - items_per_thread = ScanBlockPolicy::ITEMS_PER_THREAD; - radix_bits = 0; - alt_radix_bits = 0; - smem_config = cudaSharedMemBankSizeFourByte; - subscription_factor = 0; - tile_size = block_threads * items_per_thread; - } - - template - __host__ __device__ __forceinline__ - void InitDownsweepPolicy(int subscription_factor = 1) - { - block_threads = SortBlockPolicy::BLOCK_THREADS; - items_per_thread = SortBlockPolicy::ITEMS_PER_THREAD; - radix_bits = SortBlockPolicy::RADIX_BITS; - alt_radix_bits = SortBlockPolicy::AltPolicy::RADIX_BITS; - smem_config = SortBlockPolicy::SMEM_CONFIG; - this->subscription_factor = subscription_factor; - tile_size = block_threads * items_per_thread; - } - }; - - - - /****************************************************************************** - * Tuning policies - ******************************************************************************/ - - /// Specializations of tuned policy types for different PTX architectures - template - struct TunedPolicies; - - /// SM35 tune - template - struct TunedPolicies - { - enum { - KEYS_ONLY = (Equals::VALUE), - SCALE_FACTOR = (CUB_MAX(sizeof(Key), sizeof(Value)) + 3) / 4, - RADIX_BITS = 5, - }; - - // UpsweepPolicy - typedef BlockRadixSortUpsweepTilesPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR), LOAD_LDG, RADIX_BITS> UpsweepPolicyKeys; - typedef BlockRadixSortUpsweepTilesPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR), LOAD_LDG, RADIX_BITS> UpsweepPolicyPairs; - typedef typename If::Type UpsweepPolicy; -/* - // 4bit - typedef BlockRadixSortUpsweepTilesPolicy <128, 15, LOAD_LDG, RADIX_BITS> UpsweepPolicyKeys; - typedef BlockRadixSortUpsweepTilesPolicy <256, 13, LOAD_LDG, RADIX_BITS> UpsweepPolicyPairs; -*/ - // ScanPolicy - typedef BlockScanTilesPolicy <1024, 4, BLOCK_LOAD_VECTORIZE, false, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, false, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; - - // DownsweepPolicy - typedef BlockRadixSortDownsweepTilesPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR), BLOCK_LOAD_DIRECT, LOAD_LDG, false, true, BLOCK_SCAN_WARP_SCANS, RADIX_SORT_SCATTER_TWO_PHASE, cudaSharedMemBankSizeEightByte, RADIX_BITS> DownsweepPolicyKeys; - typedef BlockRadixSortDownsweepTilesPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR), BLOCK_LOAD_DIRECT, LOAD_LDG, false, true, BLOCK_SCAN_WARP_SCANS, RADIX_SORT_SCATTER_TWO_PHASE, cudaSharedMemBankSizeEightByte, RADIX_BITS> DownsweepPolicyPairs; - typedef typename If::Type DownsweepPolicy; - -/* - // 4bit - typedef BlockRadixSortDownsweepTilesPolicy <128, 15, BLOCK_LOAD_DIRECT, LOAD_LDG, false, true, BLOCK_SCAN_WARP_SCANS, RADIX_SORT_SCATTER_TWO_PHASE, cudaSharedMemBankSizeEightByte, RADIX_BITS> DownsweepPolicyKeys; - typedef BlockRadixSortDownsweepTilesPolicy <256, 13, BLOCK_LOAD_DIRECT, LOAD_LDG, false, true, BLOCK_SCAN_WARP_SCANS, RADIX_SORT_SCATTER_TWO_PHASE, cudaSharedMemBankSizeEightByte, RADIX_BITS> DownsweepPolicyPairs; -*/ - enum { SUBSCRIPTION_FACTOR = 7 }; - }; - - - /// SM20 tune - template - struct TunedPolicies - { - enum { - KEYS_ONLY = (Equals::VALUE), - SCALE_FACTOR = (CUB_MAX(sizeof(Key), sizeof(Value)) + 3) / 4, - RADIX_BITS = 5, - }; - - // UpsweepPolicy - typedef BlockRadixSortUpsweepTilesPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR), LOAD_DEFAULT, RADIX_BITS> UpsweepPolicyKeys; - typedef BlockRadixSortUpsweepTilesPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR), LOAD_DEFAULT, RADIX_BITS> UpsweepPolicyPairs; - typedef typename If::Type UpsweepPolicy; - - // ScanPolicy - typedef BlockScanTilesPolicy <512, 4, BLOCK_LOAD_VECTORIZE, false, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, false, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; - - // DownsweepPolicy - typedef BlockRadixSortDownsweepTilesPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, false, false, BLOCK_SCAN_WARP_SCANS, RADIX_SORT_SCATTER_TWO_PHASE, cudaSharedMemBankSizeFourByte, RADIX_BITS> DownsweepPolicyKeys; - typedef BlockRadixSortDownsweepTilesPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, false, false, BLOCK_SCAN_WARP_SCANS, RADIX_SORT_SCATTER_TWO_PHASE, cudaSharedMemBankSizeFourByte, RADIX_BITS> DownsweepPolicyPairs; - typedef typename If::Type DownsweepPolicy; - - enum { SUBSCRIPTION_FACTOR = 3 }; - }; - - - /// SM10 tune - template - struct TunedPolicies - { - enum { - RADIX_BITS = 4, - }; - - // UpsweepPolicy - typedef BlockRadixSortUpsweepTilesPolicy <64, 9, LOAD_DEFAULT, RADIX_BITS> UpsweepPolicy; - - // ScanPolicy - typedef BlockScanTilesPolicy <256, 4, BLOCK_LOAD_VECTORIZE, false, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, false, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; - - // DownsweepPolicy - typedef BlockRadixSortDownsweepTilesPolicy <64, 9, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, false, false, BLOCK_SCAN_WARP_SCANS, RADIX_SORT_SCATTER_TWO_PHASE, cudaSharedMemBankSizeFourByte, RADIX_BITS> DownsweepPolicy; - - enum { SUBSCRIPTION_FACTOR = 3 }; - }; - - - - /****************************************************************************** - * Default policy initializer - ******************************************************************************/ - - /// Tuning policy for the PTX architecture that DeviceRadixSort operations will get dispatched to - template - struct PtxDefaultPolicies - { - - static const int PTX_TUNE_ARCH = (CUB_PTX_ARCH >= 350) ? - 350 : - (CUB_PTX_ARCH >= 200) ? - 200 : - 100; - - // Tuned policy set for the current PTX compiler pass - typedef TunedPolicies PtxTunedPolicies; - - // UpsweepPolicy that opaquely derives from the specialization corresponding to the current PTX compiler pass - struct UpsweepPolicy : PtxTunedPolicies::UpsweepPolicy {}; - - // ScanPolicy that opaquely derives from the specialization corresponding to the current PTX compiler pass - struct ScanPolicy : PtxTunedPolicies::ScanPolicy {}; - - // DownsweepPolicy that opaquely derives from the specialization corresponding to the current PTX compiler pass - struct DownsweepPolicy : PtxTunedPolicies::DownsweepPolicy {}; - - // Subscription factor for the current PTX compiler pass - enum { SUBSCRIPTION_FACTOR = PtxTunedPolicies::SUBSCRIPTION_FACTOR }; - - - /** - * Initialize dispatch params with the policies corresponding to the PTX assembly we will use - */ - static void InitDispatchParams( - int ptx_version, - KernelDispachParams &upsweep_dispatch_params, - KernelDispachParams &scan_dispatch_params, - KernelDispachParams &downsweep_dispatch_params) - { - if (ptx_version >= 350) - { - typedef TunedPolicies TunedPolicies; - upsweep_dispatch_params.InitUpsweepPolicy(TunedPolicies::SUBSCRIPTION_FACTOR); - scan_dispatch_params.InitScanPolicy(); - downsweep_dispatch_params.InitDownsweepPolicy(TunedPolicies::SUBSCRIPTION_FACTOR); - } - else if (ptx_version >= 200) - { - typedef TunedPolicies TunedPolicies; - upsweep_dispatch_params.InitUpsweepPolicy(TunedPolicies::SUBSCRIPTION_FACTOR); - scan_dispatch_params.InitScanPolicy(); - downsweep_dispatch_params.InitDownsweepPolicy(TunedPolicies::SUBSCRIPTION_FACTOR); - } - else - { - typedef TunedPolicies TunedPolicies; - upsweep_dispatch_params.InitUpsweepPolicy(TunedPolicies::SUBSCRIPTION_FACTOR); - scan_dispatch_params.InitScanPolicy(); - downsweep_dispatch_params.InitDownsweepPolicy(TunedPolicies::SUBSCRIPTION_FACTOR); - } - } - }; - - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /** - * Internal dispatch routine for computing a device-wide reduction using a two-stages of kernel invocations. - */ - template < - typename UpsweepKernelPtr, ///< Function type of cub::RadixSortUpsweepKernel - typename SpineKernelPtr, ///< Function type of cub::SpineScanKernel - typename DownsweepKernelPtr, ///< Function type of cub::RadixSortUpsweepKernel - typename Key, ///< Key type - typename Value, ///< Value type - typename SizeT> ///< Integer type used for global array indexing - __host__ __device__ __forceinline__ - static cudaError_t Dispatch( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - UpsweepKernelPtr upsweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::RadixSortUpsweepKernel - SpineKernelPtr scan_kernel, ///< [in] Kernel function pointer to parameterization of cub::SpineScanKernel - DownsweepKernelPtr downsweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::RadixSortUpsweepKernel - KernelDispachParams &upsweep_dispatch_params, ///< [in] Dispatch parameters that match the policy that \p upsweep_kernel was compiled for - KernelDispachParams &scan_dispatch_params, ///< [in] Dispatch parameters that match the policy that \p scan_kernel was compiled for - KernelDispachParams &downsweep_dispatch_params, ///< [in] Dispatch parameters that match the policy that \p downsweep_kernel was compiled for - DoubleBuffer &d_keys, ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys - DoubleBuffer &d_values, ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values - SizeT num_items, ///< [in] Number of items to reduce - int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison - int end_bit = sizeof(Key) * 8, ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { -#ifndef CUB_RUNTIME_ENABLED - - // Kernel launch not supported from this device - return CubDebug(cudaErrorNotSupported ); - -#else - - cudaError error = cudaSuccess; - do - { - // Get device ordinal - int device_ordinal; - if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; - - // Get SM count - int sm_count; - if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; - - // Get a rough estimate of downsweep_kernel SM occupancy based upon the maximum SM occupancy of the targeted PTX architecture - int downsweep_sm_occupancy = CUB_MIN( - ArchProps::MAX_SM_THREADBLOCKS, - ArchProps::MAX_SM_THREADS / downsweep_dispatch_params.block_threads); - int upsweep_sm_occupancy = downsweep_sm_occupancy; - -#ifndef __CUDA_ARCH__ - // We're on the host, so come up with more accurate estimates of SM occupancy from actual device properties - Device device_props; - if (CubDebug(error = device_props.Init(device_ordinal))) break; - - if (CubDebug(error = device_props.MaxSmOccupancy( - downsweep_sm_occupancy, - downsweep_kernel, - downsweep_dispatch_params.block_threads))) break; - - if (CubDebug(error = device_props.MaxSmOccupancy( - upsweep_sm_occupancy, - upsweep_kernel, - upsweep_dispatch_params.block_threads))) break; -#endif - // Get device occupancies - int downsweep_occupancy = downsweep_sm_occupancy * sm_count; - - // Get even-share work distribution descriptor - GridEvenShare even_share; - int max_downsweep_grid_size = downsweep_occupancy * downsweep_dispatch_params.subscription_factor; - int downsweep_grid_size; - even_share.GridInit(num_items, max_downsweep_grid_size, downsweep_dispatch_params.tile_size); - downsweep_grid_size = even_share.grid_size; - - // Get number of spine elements (round up to nearest spine scan kernel tile size) - int bins = 1 << downsweep_dispatch_params.radix_bits; - int spine_size = downsweep_grid_size * bins; - int spine_tiles = (spine_size + scan_dispatch_params.tile_size - 1) / scan_dispatch_params.tile_size; - spine_size = spine_tiles * scan_dispatch_params.tile_size; - - int alt_bins = 1 << downsweep_dispatch_params.alt_radix_bits; - int alt_spine_size = downsweep_grid_size * alt_bins; - int alt_spine_tiles = (alt_spine_size + scan_dispatch_params.tile_size - 1) / scan_dispatch_params.tile_size; - alt_spine_size = alt_spine_tiles * scan_dispatch_params.tile_size; - - // Temporary storage allocation requirements - void* allocations[1]; - size_t allocation_sizes[1] = - { - spine_size * sizeof(SizeT), // bytes needed for privatized block digit histograms - }; - - // Alias temporaries (or set the necessary size of the storage allocation) - if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; - - // Return if the caller is simply requesting the size of the storage allocation - if (d_temp_storage == NULL) - return cudaSuccess; - - // Privatized per-block digit histograms - SizeT *d_spine = (SizeT*) allocations[0]; - -#ifndef __CUDA_ARCH__ - // Get current smem bank configuration - cudaSharedMemConfig original_smem_config; - if (CubDebug(error = cudaDeviceGetSharedMemConfig(&original_smem_config))) break; - cudaSharedMemConfig current_smem_config = original_smem_config; -#endif - // Iterate over digit places - int current_bit = begin_bit; - while (current_bit < end_bit) - { - // Use primary bit granularity if bits remaining is a whole multiple of bit primary granularity - int bits_remaining = end_bit - current_bit; - bool use_primary_bit_granularity = (bits_remaining % downsweep_dispatch_params.radix_bits == 0); - int radix_bits = (use_primary_bit_granularity) ? - downsweep_dispatch_params.radix_bits : - downsweep_dispatch_params.alt_radix_bits; - -#ifndef __CUDA_ARCH__ - // Update smem config if necessary - if (current_smem_config != upsweep_dispatch_params.smem_config) - { - if (CubDebug(error = cudaDeviceSetSharedMemConfig(upsweep_dispatch_params.smem_config))) break; - current_smem_config = upsweep_dispatch_params.smem_config; - } -#endif - - // Log upsweep_kernel configuration - if (stream_synchronous) - CubLog("Invoking upsweep_kernel<<<%d, %d, 0, %lld>>>(), %d smem config, %d items per thread, %d SM occupancy, selector %d, current bit %d, bit_grain %d\n", - downsweep_grid_size, upsweep_dispatch_params.block_threads, (long long) stream, upsweep_dispatch_params.smem_config, upsweep_dispatch_params.items_per_thread, upsweep_sm_occupancy, d_keys.selector, current_bit, radix_bits); - - // Invoke upsweep_kernel with same grid size as downsweep_kernel - upsweep_kernel<<>>( - d_keys.d_buffers[d_keys.selector], - d_spine, - num_items, - current_bit, - use_primary_bit_granularity, - (current_bit == begin_bit), - even_share); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - - // Log scan_kernel configuration - if (stream_synchronous) CubLog("Invoking scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread\n", - 1, scan_dispatch_params.block_threads, (long long) stream, scan_dispatch_params.items_per_thread); - - // Invoke scan_kernel - scan_kernel<<<1, scan_dispatch_params.block_threads, 0, stream>>>( - d_spine, - (use_primary_bit_granularity) ? spine_size : alt_spine_size); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - -#ifndef __CUDA_ARCH__ - // Update smem config if necessary - if (current_smem_config != downsweep_dispatch_params.smem_config) - { - if (CubDebug(error = cudaDeviceSetSharedMemConfig(downsweep_dispatch_params.smem_config))) break; - current_smem_config = downsweep_dispatch_params.smem_config; - } -#endif - - // Log downsweep_kernel configuration - if (stream_synchronous) CubLog("Invoking downsweep_kernel<<<%d, %d, 0, %lld>>>(), %d smem config, %d items per thread, %d SM occupancy\n", - downsweep_grid_size, downsweep_dispatch_params.block_threads, (long long) stream, downsweep_dispatch_params.smem_config, downsweep_dispatch_params.items_per_thread, downsweep_sm_occupancy); - - // Invoke downsweep_kernel - downsweep_kernel<<>>( - d_keys.d_buffers[d_keys.selector], - d_keys.d_buffers[d_keys.selector ^ 1], - d_values.d_buffers[d_values.selector], - d_values.d_buffers[d_values.selector ^ 1], - d_spine, - num_items, - current_bit, - use_primary_bit_granularity, - (current_bit == begin_bit), - (current_bit + downsweep_dispatch_params.radix_bits >= end_bit), - even_share); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - - // Invert selectors - d_keys.selector ^= 1; - d_values.selector ^= 1; - - // Update current bit position - current_bit += radix_bits; - } - -#ifndef __CUDA_ARCH__ - // Reset smem config if necessary - if (current_smem_config != original_smem_config) - { - if (CubDebug(error = cudaDeviceSetSharedMemConfig(original_smem_config))) break; - } -#endif - - } - while (0); - - return error; - -#endif // CUB_RUNTIME_ENABLED - } - - - - #endif // DOXYGEN_SHOULD_SKIP_THIS - - /****************************************************************************** - * Interface - ******************************************************************************/ - - - /** - * \brief Sorts key-value pairs. - * - * \par - * The sorting operation requires a pair of key buffers and a pair of value - * buffers. Each pair is wrapped in a DoubleBuffer structure whose member - * DoubleBuffer::Current() references the active buffer. The currently-active - * buffer may be changed by the sorting operation. - * - * \devicestorage - * - * \cdp - * - * \par - * The code snippet below illustrates the sorting of a device vector of \p int keys - * with associated vector of \p int values. - * \par - * \code - * #include - * ... - * - * // Create a set of DoubleBuffers to wrap pairs of device pointers for - * // sorting data (keys, values, and equivalently-sized alternate buffers) - * int num_items = ... - * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); - * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); - * - * // Determine temporary device storage requirements for sorting operation - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items); - * - * // Allocate temporary storage for sorting operation - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Run sorting operation - * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items); - * - * // Sorted keys and values are referenced by d_keys.Current() and d_values.Current() - * - * \endcode - * - * \tparam Key [inferred] Key type - * \tparam Value [inferred] Value type - */ - template < - typename Key, - typename Value> - __host__ __device__ __forceinline__ - static cudaError_t SortPairs( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - DoubleBuffer &d_keys, ///< [in,out] Double-buffer of keys whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys - DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values - int num_items, ///< [in] Number of items to reduce - int begin_bit = 0, ///< [in] [optional] The first (least-significant) bit index needed for key comparison - int end_bit = sizeof(Key) * 8, ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - // Type used for array indexing - typedef int SizeT; - - // Tuning polices - typedef PtxDefaultPolicies PtxDefaultPolicies; // Wrapper of default kernel policies - typedef typename PtxDefaultPolicies::UpsweepPolicy UpsweepPolicy; // Upsweep kernel policy - typedef typename PtxDefaultPolicies::ScanPolicy ScanPolicy; // Scan kernel policy - typedef typename PtxDefaultPolicies::DownsweepPolicy DownsweepPolicy; // Downsweep kernel policy - - cudaError error = cudaSuccess; - do - { - // Declare dispatch parameters - KernelDispachParams upsweep_dispatch_params; - KernelDispachParams scan_dispatch_params; - KernelDispachParams downsweep_dispatch_params; - -#ifdef __CUDA_ARCH__ - // We're on the device, so initialize the dispatch parameters with the PtxDefaultPolicies directly - upsweep_dispatch_params.InitUpsweepPolicy(PtxDefaultPolicies::SUBSCRIPTION_FACTOR); - scan_dispatch_params.InitScanPolicy(); - downsweep_dispatch_params.InitDownsweepPolicy(PtxDefaultPolicies::SUBSCRIPTION_FACTOR); -#else - // We're on the host, so lookup and initialize the dispatch parameters with the policies that match the device's PTX version - int ptx_version; - if (CubDebug(error = PtxVersion(ptx_version))) break; - PtxDefaultPolicies::InitDispatchParams( - ptx_version, - upsweep_dispatch_params, - scan_dispatch_params, - downsweep_dispatch_params); -#endif - // Dispatch - if (CubDebug(error = Dispatch( - d_temp_storage, - temp_storage_bytes, - RadixSortUpsweepKernel, - RadixSortScanKernel, - RadixSortDownsweepKernel, - upsweep_dispatch_params, - scan_dispatch_params, - downsweep_dispatch_params, - d_keys, - d_values, - num_items, - begin_bit, - end_bit, - stream, - stream_synchronous))) break; - } - while (0); - - return error; - } - - - /** - * \brief Sorts keys - * - * \par - * The sorting operation requires a pair of key buffers. The pair is - * wrapped in a DoubleBuffer structure whose member DoubleBuffer::Current() - * references the active buffer. The currently-active buffer may be changed - * by the sorting operation. - * - * \devicestorage - * - * \cdp - * - * \par - * The code snippet below illustrates the sorting of a device vector of \p int keys. - * \par - * \code - * #include - * ... - * - * // Create a set of DoubleBuffers to wrap pairs of device pointers for - * // sorting data (keys and equivalently-sized alternate buffer) - * int num_items = ... - * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); - * - * // Determine temporary device storage requirements for sorting operation - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items); - * - * // Allocate temporary storage for sorting operation - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Run sorting operation - * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items); - * - * // Sorted keys are referenced by d_keys.Current() - * - * \endcode - * - * \tparam Key [inferred] Key type - */ - template - __host__ __device__ __forceinline__ - static cudaError_t SortKeys( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - DoubleBuffer &d_keys, ///< [in,out] Double-buffer of keys whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys - int num_items, ///< [in] Number of items to reduce - int begin_bit = 0, ///< [in] [optional] The first (least-significant) bit index needed for key comparison - int end_bit = sizeof(Key) * 8, ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - DoubleBuffer d_values; - return SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, stream, stream_synchronous); - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - - diff --git a/kokkos/kokkos/TPL/cub/device/device_reduce.cuh b/kokkos/kokkos/TPL/cub/device/device_reduce.cuh deleted file mode 100644 index 069af8c..0000000 --- a/kokkos/kokkos/TPL/cub/device/device_reduce.cuh +++ /dev/null @@ -1,775 +0,0 @@ - -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::DeviceReduce provides operations for computing a device-wide, parallel reduction across data items residing within global memory. - */ - -#pragma once - -#include -#include - -#include "block/block_reduce_tiles.cuh" -#include "../thread/thread_operators.cuh" -#include "../grid/grid_even_share.cuh" -#include "../grid/grid_queue.cuh" -#include "../util_debug.cuh" -#include "../util_device.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - - - - - -/****************************************************************************** - * Kernel entry points - *****************************************************************************/ - -/** - * Reduction pass kernel entry point (multi-block). Computes privatized reductions, one per thread block. - */ -template < - typename BlockReduceTilesPolicy, ///< Tuning policy for cub::BlockReduceTiles abstraction - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename SizeT, ///< Integer type used for global array indexing - typename ReductionOp> ///< Binary reduction operator type having member T operator()(const T &a, const T &b) -__launch_bounds__ (int(BlockReduceTilesPolicy::BLOCK_THREADS), 1) -__global__ void ReducePrivatizedKernel( - InputIteratorRA d_in, ///< [in] Input data to reduce - OutputIteratorRA d_out, ///< [out] Output location for result - SizeT num_items, ///< [in] Total number of input data items - GridEvenShare even_share, ///< [in] Descriptor for how to map an even-share of tiles across thread blocks - GridQueue queue, ///< [in] Descriptor for performing dynamic mapping of tile data to thread blocks - ReductionOp reduction_op) ///< [in] Binary reduction operator -{ - // Data type - typedef typename std::iterator_traits::value_type T; - - // Thread block type for reducing input tiles - typedef BlockReduceTiles BlockReduceTilesT; - - // Block-wide aggregate - T block_aggregate; - - // Shared memory storage - __shared__ typename BlockReduceTilesT::TempStorage temp_storage; - - // Consume input tiles - BlockReduceTilesT(temp_storage, d_in, reduction_op).ConsumeTiles( - num_items, - even_share, - queue, - block_aggregate, - Int2Type()); - - // Output result - if (threadIdx.x == 0) - { - d_out[blockIdx.x] = block_aggregate; - } -} - - -/** - * Reduction pass kernel entry point (single-block). Aggregates privatized threadblock reductions from a previous multi-block reduction pass. - */ -template < - typename BlockReduceTilesPolicy, ///< Tuning policy for cub::BlockReduceTiles abstraction - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename SizeT, ///< Integer type used for global array indexing - typename ReductionOp> ///< Binary reduction operator type having member T operator()(const T &a, const T &b) -__launch_bounds__ (int(BlockReduceTilesPolicy::BLOCK_THREADS), 1) -__global__ void ReduceSingleKernel( - InputIteratorRA d_in, ///< [in] Input data to reduce - OutputIteratorRA d_out, ///< [out] Output location for result - SizeT num_items, ///< [in] Total number of input data items - ReductionOp reduction_op) ///< [in] Binary reduction operator -{ - // Data type - typedef typename std::iterator_traits::value_type T; - - // Thread block type for reducing input tiles - typedef BlockReduceTiles BlockReduceTilesT; - - // Block-wide aggregate - T block_aggregate; - - // Shared memory storage - __shared__ typename BlockReduceTilesT::TempStorage temp_storage; - - // Consume input tiles - BlockReduceTilesT(temp_storage, d_in, reduction_op).ConsumeTiles( - SizeT(0), - SizeT(num_items), - block_aggregate); - - // Output result - if (threadIdx.x == 0) - { - d_out[blockIdx.x] = block_aggregate; - } -} - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/****************************************************************************** - * DeviceReduce - *****************************************************************************/ - -/** - * \brief DeviceReduce provides operations for computing a device-wide, parallel reduction across data items residing within global memory. ![](reduce_logo.png) - * \ingroup DeviceModule - * - * \par Overview - * A reduction (or fold) - * uses a binary combining operator to compute a single aggregate from a list of input elements. - * - * \par Usage Considerations - * \cdp_class{DeviceReduce} - * - * \par Performance - * - * \image html reduction_perf.png - * - */ -struct DeviceReduce -{ -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - /// Generic structure for encapsulating dispatch properties codified in block policy. - struct KernelDispachParams - { - int block_threads; - int items_per_thread; - int vector_load_length; - BlockReduceAlgorithm block_algorithm; - PtxLoadModifier load_modifier; - GridMappingStrategy grid_mapping; - int subscription_factor; - int tile_size; - - template - __host__ __device__ __forceinline__ - void Init(int subscription_factor = 1) - { - block_threads = BlockPolicy::BLOCK_THREADS; - items_per_thread = BlockPolicy::ITEMS_PER_THREAD; - vector_load_length = BlockPolicy::VECTOR_LOAD_LENGTH; - block_algorithm = BlockPolicy::BLOCK_ALGORITHM; - load_modifier = BlockPolicy::LOAD_MODIFIER; - grid_mapping = BlockPolicy::GRID_MAPPING; - this->subscription_factor = subscription_factor; - tile_size = block_threads * items_per_thread; - } - - __host__ __device__ __forceinline__ - void Print() - { - printf("%d threads, %d per thread, %d veclen, %d algo, %d loadmod, %d mapping, %d subscription", - block_threads, - items_per_thread, - vector_load_length, - block_algorithm, - load_modifier, - grid_mapping, - subscription_factor); - } - - }; - - - /****************************************************************************** - * Tuning policies - ******************************************************************************/ - - /// Specializations of tuned policy types for different PTX architectures - template < - typename T, - typename SizeT, - int ARCH> - struct TunedPolicies; - - /// SM35 tune - template - struct TunedPolicies - { - // PrivatizedPolicy (1B): GTX Titan: 206.0 GB/s @ 192M 1B items - typedef BlockReduceTilesPolicy<128, 12, 1, BLOCK_REDUCE_RAKING, LOAD_LDG, GRID_MAPPING_DYNAMIC> PrivatizedPolicy1B; - - // PrivatizedPolicy (4B): GTX Titan: 254.2 GB/s @ 48M 4B items - typedef BlockReduceTilesPolicy<512, 20, 1, BLOCK_REDUCE_RAKING, LOAD_DEFAULT, GRID_MAPPING_EVEN_SHARE> PrivatizedPolicy4B; - - // PrivatizedPolicy - typedef typename If<(sizeof(T) < 4), - PrivatizedPolicy1B, - PrivatizedPolicy4B>::Type PrivatizedPolicy; - - // SinglePolicy - typedef BlockReduceTilesPolicy<256, 8, 1, BLOCK_REDUCE_WARP_REDUCTIONS, LOAD_DEFAULT, GRID_MAPPING_EVEN_SHARE> SinglePolicy; - - enum { SUBSCRIPTION_FACTOR = 7 }; - - }; - - /// SM30 tune - template - struct TunedPolicies - { - // PrivatizedPolicy: GTX670: 154.0 @ 48M 32-bit T - typedef BlockReduceTilesPolicy<256, 2, 1, BLOCK_REDUCE_WARP_REDUCTIONS, LOAD_DEFAULT, GRID_MAPPING_EVEN_SHARE> PrivatizedPolicy; - - // SinglePolicy - typedef BlockReduceTilesPolicy<256, 24, 4, BLOCK_REDUCE_WARP_REDUCTIONS, LOAD_DEFAULT, GRID_MAPPING_EVEN_SHARE> SinglePolicy; - - enum { SUBSCRIPTION_FACTOR = 1 }; - }; - - /// SM20 tune - template - struct TunedPolicies - { - // PrivatizedPolicy (1B): GTX 580: 158.1 GB/s @ 192M 1B items - typedef BlockReduceTilesPolicy<192, 24, 4, BLOCK_REDUCE_RAKING, LOAD_DEFAULT, GRID_MAPPING_EVEN_SHARE> PrivatizedPolicy1B; - - // PrivatizedPolicy (4B): GTX 580: 178.9 GB/s @ 48M 4B items - typedef BlockReduceTilesPolicy<128, 8, 4, BLOCK_REDUCE_RAKING, LOAD_DEFAULT, GRID_MAPPING_DYNAMIC> PrivatizedPolicy4B; - - // PrivatizedPolicy - typedef typename If<(sizeof(T) < 4), - PrivatizedPolicy1B, - PrivatizedPolicy4B>::Type PrivatizedPolicy; - - // SinglePolicy - typedef BlockReduceTilesPolicy<192, 7, 1, BLOCK_REDUCE_RAKING, LOAD_DEFAULT, GRID_MAPPING_EVEN_SHARE> SinglePolicy; - - enum { SUBSCRIPTION_FACTOR = 2 }; - }; - - /// SM13 tune - template - struct TunedPolicies - { - // PrivatizedPolicy - typedef BlockReduceTilesPolicy<128, 8, 2, BLOCK_REDUCE_RAKING, LOAD_DEFAULT, GRID_MAPPING_EVEN_SHARE> PrivatizedPolicy; - - // SinglePolicy - typedef BlockReduceTilesPolicy<32, 4, 4, BLOCK_REDUCE_RAKING, LOAD_DEFAULT, GRID_MAPPING_EVEN_SHARE> SinglePolicy; - - enum { SUBSCRIPTION_FACTOR = 1 }; - }; - - /// SM10 tune - template - struct TunedPolicies - { - // PrivatizedPolicy - typedef BlockReduceTilesPolicy<128, 8, 2, BLOCK_REDUCE_RAKING, LOAD_DEFAULT, GRID_MAPPING_EVEN_SHARE> PrivatizedPolicy; - - // SinglePolicy - typedef BlockReduceTilesPolicy<32, 4, 4, BLOCK_REDUCE_RAKING, LOAD_DEFAULT, GRID_MAPPING_EVEN_SHARE> SinglePolicy; - - enum { SUBSCRIPTION_FACTOR = 1 }; - }; - - - - /****************************************************************************** - * Default policy initializer - ******************************************************************************/ - - /// Tuning policy for the PTX architecture that DeviceReduce operations will get dispatched to - template - struct PtxDefaultPolicies - { - static const int PTX_TUNE_ARCH = (CUB_PTX_ARCH >= 350) ? - 350 : - (CUB_PTX_ARCH >= 300) ? - 300 : - (CUB_PTX_ARCH >= 200) ? - 200 : - (CUB_PTX_ARCH >= 130) ? - 130 : - 100; - - // Tuned policy set for the current PTX compiler pass - typedef TunedPolicies PtxTunedPolicies; - - // Subscription factor for the current PTX compiler pass - static const int SUBSCRIPTION_FACTOR = PtxTunedPolicies::SUBSCRIPTION_FACTOR; - - // PrivatizedPolicy that opaquely derives from the specialization corresponding to the current PTX compiler pass - struct PrivatizedPolicy : PtxTunedPolicies::PrivatizedPolicy {}; - - // SinglePolicy that opaquely derives from the specialization corresponding to the current PTX compiler pass - struct SinglePolicy : PtxTunedPolicies::SinglePolicy {}; - - - /** - * Initialize dispatch params with the policies corresponding to the PTX assembly we will use - */ - static void InitDispatchParams( - int ptx_version, - KernelDispachParams &privatized_dispatch_params, - KernelDispachParams &single_dispatch_params) - { - if (ptx_version >= 350) - { - typedef TunedPolicies TunedPolicies; - privatized_dispatch_params.Init(TunedPolicies::SUBSCRIPTION_FACTOR); - single_dispatch_params.Init(); - } - else if (ptx_version >= 300) - { - typedef TunedPolicies TunedPolicies; - privatized_dispatch_params.Init(TunedPolicies::SUBSCRIPTION_FACTOR); - single_dispatch_params.Init(); - } - else if (ptx_version >= 200) - { - typedef TunedPolicies TunedPolicies; - privatized_dispatch_params.Init(TunedPolicies::SUBSCRIPTION_FACTOR); - single_dispatch_params.Init(); - } - else if (ptx_version >= 130) - { - typedef TunedPolicies TunedPolicies; - privatized_dispatch_params.Init(TunedPolicies::SUBSCRIPTION_FACTOR); - single_dispatch_params.Init(); - } - else - { - typedef TunedPolicies TunedPolicies; - privatized_dispatch_params.Init(TunedPolicies::SUBSCRIPTION_FACTOR); - single_dispatch_params.Init(); - } - } - }; - - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /** - * Internal dispatch routine for computing a device-wide reduction using a two-stages of kernel invocations. - */ - template < - typename ReducePrivatizedKernelPtr, ///< Function type of cub::ReducePrivatizedKernel - typename ReduceSingleKernelPtr, ///< Function type of cub::ReduceSingleKernel - typename ResetDrainKernelPtr, ///< Function type of cub::ResetDrainKernel - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename SizeT, ///< Integer type used for global array indexing - typename ReductionOp> ///< Binary reduction operator type having member T operator()(const T &a, const T &b) - __host__ __device__ __forceinline__ - static cudaError_t Dispatch( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - ReducePrivatizedKernelPtr privatized_kernel, ///< [in] Kernel function pointer to parameterization of cub::ReducePrivatizedKernel - ReduceSingleKernelPtr single_kernel, ///< [in] Kernel function pointer to parameterization of cub::ReduceSingleKernel - ResetDrainKernelPtr prepare_drain_kernel, ///< [in] Kernel function pointer to parameterization of cub::ResetDrainKernel - KernelDispachParams &privatized_dispatch_params, ///< [in] Dispatch parameters that match the policy that \p privatized_kernel_ptr was compiled for - KernelDispachParams &single_dispatch_params, ///< [in] Dispatch parameters that match the policy that \p single_kernel was compiled for - InputIteratorRA d_in, ///< [in] Input data to reduce - OutputIteratorRA d_out, ///< [out] Output location for result - SizeT num_items, ///< [in] Number of items to reduce - ReductionOp reduction_op, ///< [in] Binary reduction operator - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { -#ifndef CUB_RUNTIME_ENABLED - - // Kernel launch not supported from this device - return CubDebug(cudaErrorNotSupported ); - -#else - - // Data type of input iterator - typedef typename std::iterator_traits::value_type T; - - cudaError error = cudaSuccess; - do - { - if ((privatized_kernel == NULL) || (num_items <= (single_dispatch_params.tile_size))) - { - // Dispatch a single-block reduction kernel - - // Return if the caller is simply requesting the size of the storage allocation - if (d_temp_storage == NULL) - { - temp_storage_bytes = 1; - return cudaSuccess; - } - - // Log single_kernel configuration - if (stream_synchronous) CubLog("Invoking ReduceSingle<<<1, %d, 0, %lld>>>(), %d items per thread\n", - single_dispatch_params.block_threads, (long long) stream, single_dispatch_params.items_per_thread); - - // Invoke single_kernel - single_kernel<<<1, single_dispatch_params.block_threads>>>( - d_in, - d_out, - num_items, - reduction_op); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - - } - else - { - // Dispatch two kernels: a multi-block kernel to compute - // privatized per-block reductions, and then a single-block - // to reduce those - - // Get device ordinal - int device_ordinal; - if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; - - // Get SM count - int sm_count; - if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; - - // Get a rough estimate of privatized_kernel SM occupancy based upon the maximum SM occupancy of the targeted PTX architecture - int privatized_sm_occupancy = CUB_MIN( - ArchProps::MAX_SM_THREADBLOCKS, - ArchProps::MAX_SM_THREADS / privatized_dispatch_params.block_threads); - -#ifndef __CUDA_ARCH__ - // We're on the host, so come up with a more accurate estimate of privatized_kernel SM occupancy from actual device properties - Device device_props; - if (CubDebug(error = device_props.Init(device_ordinal))) break; - - if (CubDebug(error = device_props.MaxSmOccupancy( - privatized_sm_occupancy, - privatized_kernel, - privatized_dispatch_params.block_threads))) break; -#endif - - // Get device occupancy for privatized_kernel - int privatized_occupancy = privatized_sm_occupancy * sm_count; - - // Even-share work distribution - GridEvenShare even_share; - - // Get grid size for privatized_kernel - int privatized_grid_size; - switch (privatized_dispatch_params.grid_mapping) - { - case GRID_MAPPING_EVEN_SHARE: - - // Work is distributed evenly - even_share.GridInit( - num_items, - privatized_occupancy * privatized_dispatch_params.subscription_factor, - privatized_dispatch_params.tile_size); - privatized_grid_size = even_share.grid_size; - break; - - case GRID_MAPPING_DYNAMIC: - - // Work is distributed dynamically - int num_tiles = (num_items + privatized_dispatch_params.tile_size - 1) / privatized_dispatch_params.tile_size; - privatized_grid_size = (num_tiles < privatized_occupancy) ? - num_tiles : // Not enough to fill the device with threadblocks - privatized_occupancy; // Fill the device with threadblocks - break; - }; - - // Temporary storage allocation requirements - void* allocations[2]; - size_t allocation_sizes[2] = - { - privatized_grid_size * sizeof(T), // bytes needed for privatized block reductions - GridQueue::AllocationSize() // bytes needed for grid queue descriptor - }; - - // Alias temporaries (or set the necessary size of the storage allocation) - if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; - - // Return if the caller is simply requesting the size of the storage allocation - if (d_temp_storage == NULL) - return cudaSuccess; - - // Privatized per-block reductions - T *d_block_reductions = (T*) allocations[0]; - - // Grid queue descriptor - GridQueue queue(allocations[1]); - - // Prepare the dynamic queue descriptor if necessary - if (privatized_dispatch_params.grid_mapping == GRID_MAPPING_DYNAMIC) - { - // Prepare queue using a kernel so we know it gets prepared once per operation - if (stream_synchronous) CubLog("Invoking prepare_drain_kernel<<<1, 1, 0, %lld>>>()\n", (long long) stream); - - // Invoke prepare_drain_kernel - prepare_drain_kernel<<<1, 1, 0, stream>>>(queue, num_items); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - } - - // Log privatized_kernel configuration - if (stream_synchronous) CubLog("Invoking privatized_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", - privatized_grid_size, privatized_dispatch_params.block_threads, (long long) stream, privatized_dispatch_params.items_per_thread, privatized_sm_occupancy); - - // Invoke privatized_kernel - privatized_kernel<<>>( - d_in, - d_block_reductions, - num_items, - even_share, - queue, - reduction_op); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - - // Log single_kernel configuration - if (stream_synchronous) CubLog("Invoking single_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread\n", - 1, single_dispatch_params.block_threads, (long long) stream, single_dispatch_params.items_per_thread); - - // Invoke single_kernel - single_kernel<<<1, single_dispatch_params.block_threads, 0, stream>>>( - d_block_reductions, - d_out, - privatized_grid_size, - reduction_op); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - } - } - while (0); - - return error; - -#endif // CUB_RUNTIME_ENABLED - } - - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - /****************************************************************************** - * Interface - ******************************************************************************/ - - /** - * \brief Computes a device-wide reduction using the specified binary \p reduction_op functor. - * - * \par - * Does not support non-commutative reduction operators. - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the max reduction of a device vector of \p int items. - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input and output - * int *d_reduce_input, *d_aggregate; - * int num_items = ... - * ... - * - * // Determine temporary device storage requirements for reduction - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceReduce::Reduce(d_temp_storage, temp_storage_bytes, d_reduce_input, d_aggregate, num_items, cub::Max()); - * - * // Allocate temporary storage for reduction - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Run reduction (max) - * cub::DeviceReduce::Reduce(d_temp_storage, temp_storage_bytes, d_reduce_input, d_aggregate, num_items, cub::Max()); - * - * \endcode - * - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) - * \tparam OutputIteratorRA [inferred] Random-access iterator type for output (may be a simple pointer type) - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ - template < - typename InputIteratorRA, - typename OutputIteratorRA, - typename ReductionOp> - __host__ __device__ __forceinline__ - static cudaError_t Reduce( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_in, ///< [in] Input data to reduce - OutputIteratorRA d_out, ///< [out] Output location for result - int num_items, ///< [in] Number of items to reduce - ReductionOp reduction_op, ///< [in] Binary reduction operator - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - // Type used for array indexing - typedef int SizeT; - - // Data type of input iterator - typedef typename std::iterator_traits::value_type T; - - // Tuning polices - typedef PtxDefaultPolicies PtxDefaultPolicies; // Wrapper of default kernel policies - typedef typename PtxDefaultPolicies::PrivatizedPolicy PrivatizedPolicy; // Multi-block kernel policy - typedef typename PtxDefaultPolicies::SinglePolicy SinglePolicy; // Single-block kernel policy - - cudaError error = cudaSuccess; - do - { - // Declare dispatch parameters - KernelDispachParams privatized_dispatch_params; - KernelDispachParams single_dispatch_params; - -#ifdef __CUDA_ARCH__ - // We're on the device, so initialize the dispatch parameters with the PtxDefaultPolicies directly - privatized_dispatch_params.Init(PtxDefaultPolicies::SUBSCRIPTION_FACTOR); - single_dispatch_params.Init(); -#else - // We're on the host, so lookup and initialize the dispatch parameters with the policies that match the device's PTX version - int ptx_version; - if (CubDebug(error = PtxVersion(ptx_version))) break; - PtxDefaultPolicies::InitDispatchParams(ptx_version, privatized_dispatch_params, single_dispatch_params); -#endif - - // Dispatch - if (CubDebug(error = Dispatch( - d_temp_storage, - temp_storage_bytes, - ReducePrivatizedKernel, - ReduceSingleKernel, - ResetDrainKernel, - privatized_dispatch_params, - single_dispatch_params, - d_in, - d_out, - num_items, - reduction_op, - stream, - stream_synchronous))) break; - } - while (0); - - return error; - } - - - /** - * \brief Computes a device-wide sum using the addition ('+') operator. - * - * \par - * Does not support non-commutative reduction operators. - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the sum reduction of a device vector of \p int items. - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input and output - * int *d_reduce_input, *d_aggregate; - * int num_items = ... - * ... - * - * // Determine temporary device storage requirements for summation - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_reduce_input, d_aggregate, num_items); - * - * // Allocate temporary storage for summation - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Run reduction summation - * cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_reduce_input, d_aggregate, num_items); - * - * \endcode - * - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) - * \tparam OutputIteratorRA [inferred] Random-access iterator type for output (may be a simple pointer type) - */ - template < - typename InputIteratorRA, - typename OutputIteratorRA> - __host__ __device__ __forceinline__ - static cudaError_t Sum( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_in, ///< [in] Input data to reduce - OutputIteratorRA d_out, ///< [out] Output location for result - int num_items, ///< [in] Number of items to reduce - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - return Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, cub::Sum(), stream, stream_synchronous); - } - - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - - diff --git a/kokkos/kokkos/TPL/cub/device/device_reduce_by_key.cuh b/kokkos/kokkos/TPL/cub/device/device_reduce_by_key.cuh deleted file mode 100644 index f05f751..0000000 --- a/kokkos/kokkos/TPL/cub/device/device_reduce_by_key.cuh +++ /dev/null @@ -1,633 +0,0 @@ - -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::DeviceReduceByKey provides operations for computing a device-wide, parallel prefix scan across data items residing within global memory. - */ - -#pragma once - -#include -#include - -#include "block/block_reduce_by_key_tiles.cuh" -#include "device_scan.cuh" -#include "../thread/thread_operators.cuh" -#include "../grid/grid_queue.cuh" -#include "../util_iterator.cuh" -#include "../util_debug.cuh" -#include "../util_device.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Kernel entry points - *****************************************************************************/ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - -/** - * Reduce-by-key kernel entry point (multi-block) - */ -template < - typename BlockReduceByKeyilesPolicy, ///< Tuning policy for cub::BlockReduceByKeyiles abstraction - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename T, ///< The scan data type - typename ReductionOp, ///< Binary scan operator type having member T operator()(const T &a, const T &b) - typename Identity, ///< Identity value type (cub::NullType for inclusive scans) - typename SizeT> ///< Integer type used for global array indexing -__launch_bounds__ (int(BlockSweepScanPolicy::BLOCK_THREADS)) -__global__ void MultiBlockScanKernel( - InputIteratorRA d_in, ///< Input data - OutputIteratorRA d_out, ///< Output data - ScanTileDescriptor *d_tile_status, ///< Global list of tile status - ReductionOp reduction_op, ///< Binary scan operator - Identity identity, ///< Identity element - SizeT num_items, ///< Total number of scan items for the entire problem - GridQueue queue) ///< Descriptor for performing dynamic mapping of tile data to thread blocks -{ - enum - { - TILE_STATUS_PADDING = PtxArchProps::WARP_THREADS, - }; - - // Thread block type for scanning input tiles - typedef BlockSweepScan< - BlockSweepScanPolicy, - InputIteratorRA, - OutputIteratorRA, - ReductionOp, - Identity, - SizeT> BlockSweepScanT; - - // Shared memory for BlockSweepScan - __shared__ typename BlockSweepScanT::TempStorage temp_storage; - - // Process tiles - BlockSweepScanT(temp_storage, d_in, d_out, reduction_op, identity).ConsumeTiles( - num_items, - queue, - d_tile_status + TILE_STATUS_PADDING); -} - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - - -/****************************************************************************** - * DeviceReduceByKey - *****************************************************************************/ - -/** - * \addtogroup DeviceModule - * @{ - */ - -/** - * \brief DeviceReduceByKey provides operations for computing a device-wide, parallel prefix scan across data items residing within global memory. ![](scan_logo.png) - */ -struct DeviceReduceByKey -{ -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - /// Generic structure for encapsulating dispatch properties. Mirrors the constants within BlockSweepScanPolicy. - struct KernelDispachParams - { - // Policy fields - int block_threads; - int items_per_thread; - BlockLoadAlgorithm load_policy; - BlockStoreAlgorithm store_policy; - BlockScanAlgorithm scan_algorithm; - - // Other misc - int tile_size; - - template - __host__ __device__ __forceinline__ - void Init() - { - block_threads = BlockSweepScanPolicy::BLOCK_THREADS; - items_per_thread = BlockSweepScanPolicy::ITEMS_PER_THREAD; - load_policy = BlockSweepScanPolicy::LOAD_ALGORITHM; - store_policy = BlockSweepScanPolicy::STORE_ALGORITHM; - scan_algorithm = BlockSweepScanPolicy::SCAN_ALGORITHM; - - tile_size = block_threads * items_per_thread; - } - - __host__ __device__ __forceinline__ - void Print() - { - printf("%d, %d, %d, %d, %d", - block_threads, - items_per_thread, - load_policy, - store_policy, - scan_algorithm); - } - - }; - - - /****************************************************************************** - * Tuning policies - ******************************************************************************/ - - - /// Specializations of tuned policy types for different PTX architectures - template < - typename T, - typename SizeT, - int ARCH> - struct TunedPolicies; - - /// SM35 tune - template - struct TunedPolicies - { - typedef BlockSweepScanPolicy<128, 16, BLOCK_LOAD_DIRECT, false, LOAD_LDG, BLOCK_STORE_WARP_TRANSPOSE, true, BLOCK_SCAN_RAKING_MEMOIZE> MultiBlockPolicy; - }; - - /// SM30 tune - template - struct TunedPolicies - { - typedef BlockSweepScanPolicy<256, 9, BLOCK_LOAD_WARP_TRANSPOSE, false, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, false, BLOCK_SCAN_RAKING_MEMOIZE> MultiBlockPolicy; - }; - - /// SM20 tune - template - struct TunedPolicies - { - typedef BlockSweepScanPolicy<128, 15, BLOCK_LOAD_WARP_TRANSPOSE, false, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, false, BLOCK_SCAN_RAKING_MEMOIZE> MultiBlockPolicy; - }; - - /// SM10 tune - template - struct TunedPolicies - { - typedef BlockSweepScanPolicy<128, 7, BLOCK_LOAD_TRANSPOSE, false, LOAD_DEFAULT, BLOCK_STORE_TRANSPOSE, false, BLOCK_SCAN_RAKING> MultiBlockPolicy; - }; - - - /// Tuning policy for the PTX architecture that DeviceReduceByKey operations will get dispatched to - template - struct PtxDefaultPolicies - { - static const int PTX_TUNE_ARCH = (CUB_PTX_ARCH >= 350) ? - 350 : - (CUB_PTX_ARCH >= 300) ? - 300 : - (CUB_PTX_ARCH >= 200) ? - 200 : - 100; - - // Tuned policy set for the current PTX compiler pass - typedef TunedPolicies PtxTunedPolicies; - - // MultiBlockPolicy that opaquely derives from the specialization corresponding to the current PTX compiler pass - struct MultiBlockPolicy : PtxTunedPolicies::MultiBlockPolicy {}; - - /** - * Initialize dispatch params with the policies corresponding to the PTX assembly we will use - */ - static void InitDispatchParams(int ptx_version, KernelDispachParams &multi_block_dispatch_params) - { - if (ptx_version >= 350) - { - typedef TunedPolicies TunedPolicies; - multi_block_dispatch_params.Init(); - } - else if (ptx_version >= 300) - { - typedef TunedPolicies TunedPolicies; - multi_block_dispatch_params.Init(); - } - else if (ptx_version >= 200) - { - typedef TunedPolicies TunedPolicies; - multi_block_dispatch_params.Init(); - } - else - { - typedef TunedPolicies TunedPolicies; - multi_block_dispatch_params.Init(); - } - } - }; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /** - * Internal dispatch routine - */ - template < - typename InitScanKernelPtr, ///< Function type of cub::InitScanKernel - typename MultiBlockScanKernelPtr, ///< Function type of cub::MultiBlockScanKernel - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename ReductionOp, ///< Binary scan operator type having member T operator()(const T &a, const T &b) - typename Identity, ///< Identity value type (cub::NullType for inclusive scans) - typename SizeT> ///< Integer type used for global array indexing - __host__ __device__ __forceinline__ - static cudaError_t Dispatch( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InitScanKernelPtr init_kernel, ///< [in] Kernel function pointer to parameterization of cub::InitScanKernel - MultiBlockScanKernelPtr multi_block_kernel, ///< [in] Kernel function pointer to parameterization of cub::MultiBlockScanKernel - KernelDispachParams &multi_block_dispatch_params, ///< [in] Dispatch parameters that match the policy that \p multi_block_kernel was compiled for - InputIteratorRA d_in, ///< [in] Iterator pointing to scan input - OutputIteratorRA d_out, ///< [in] Iterator pointing to scan output - ReductionOp reduction_op, ///< [in] Binary scan operator - Identity identity, ///< [in] Identity element - SizeT num_items, ///< [in] Total number of items to scan - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - -#ifndef CUB_RUNTIME_ENABLED - - // Kernel launch not supported from this device - return CubDebug(cudaErrorNotSupported ); - -#else - - enum - { - TILE_STATUS_PADDING = 32, - }; - - // Data type - typedef typename std::iterator_traits::value_type T; - - cudaError error = cudaSuccess; - do - { - // Number of input tiles - int num_tiles = (num_items + multi_block_dispatch_params.tile_size - 1) / multi_block_dispatch_params.tile_size; - - // Temporary storage allocation requirements - void* allocations[2]; - size_t allocation_sizes[2] = - { - (num_tiles + TILE_STATUS_PADDING) * sizeof(ScanTileDescriptor), // bytes needed for tile status descriptors - GridQueue::AllocationSize() // bytes needed for grid queue descriptor - }; - - // Alias temporaries (or set the necessary size of the storage allocation) - if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; - - // Return if the caller is simply requesting the size of the storage allocation - if (d_temp_storage == NULL) - return cudaSuccess; - - // Global list of tile status - ScanTileDescriptor *d_tile_status = (ScanTileDescriptor*) allocations[0]; - - // Grid queue descriptor - GridQueue queue(allocations[1]); - - // Get GPU id - int device_ordinal; - if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; - - // Get SM count - int sm_count; - if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; - - // Log init_kernel configuration - int init_kernel_threads = 128; - int init_grid_size = (num_tiles + init_kernel_threads - 1) / init_kernel_threads; - if (stream_synchronous) CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, init_kernel_threads, (long long) stream); - - // Invoke init_kernel to initialize tile descriptors and queue descriptors - init_kernel<<>>( - queue, - d_tile_status, - num_tiles); - - // Sync the stream if specified -#ifndef __CUDA_ARCH__ - if (stream_synchronous && CubDebug(error = cudaStreamSynchronize(stream))) break; -#else - if (stream_synchronous && CubDebug(error = cudaDeviceSynchronize())) break; -#endif - - // Get a rough estimate of multi_block_kernel SM occupancy based upon the maximum SM occupancy of the targeted PTX architecture - int multi_sm_occupancy = CUB_MIN( - ArchProps::MAX_SM_THREADBLOCKS, - ArchProps::MAX_SM_THREADS / multi_block_dispatch_params.block_threads); - -#ifndef __CUDA_ARCH__ - - // We're on the host, so come up with a more accurate estimate of multi_block_kernel SM occupancy from actual device properties - Device device_props; - if (CubDebug(error = device_props.Init(device_ordinal))) break; - - if (CubDebug(error = device_props.MaxSmOccupancy( - multi_sm_occupancy, - multi_block_kernel, - multi_block_dispatch_params.block_threads))) break; - -#endif - // Get device occupancy for multi_block_kernel - int multi_block_occupancy = multi_sm_occupancy * sm_count; - - // Get grid size for multi_block_kernel - int multi_block_grid_size = (num_tiles < multi_block_occupancy) ? - num_tiles : // Not enough to fill the device with threadblocks - multi_block_occupancy; // Fill the device with threadblocks - - // Log multi_block_kernel configuration - if (stream_synchronous) CubLog("Invoking multi_block_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", - multi_block_grid_size, multi_block_dispatch_params.block_threads, (long long) stream, multi_block_dispatch_params.items_per_thread, multi_sm_occupancy); - - // Invoke multi_block_kernel - multi_block_kernel<<>>( - d_in, - d_out, - d_tile_status, - reduction_op, - identity, - num_items, - queue); - - // Sync the stream if specified -#ifndef __CUDA_ARCH__ - if (stream_synchronous && CubDebug(error = cudaStreamSynchronize(stream))) break; -#else - if (stream_synchronous && CubDebug(error = cudaDeviceSynchronize())) break; -#endif - } - while (0); - - return error; - -#endif // CUB_RUNTIME_ENABLED - } - - - - /** - * Internal scan dispatch routine for using default tuning policies - */ - template < - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename ReductionOp, ///< Binary scan operator type having member T operator()(const T &a, const T &b) - typename Identity, ///< Identity value type (cub::NullType for inclusive scans) - typename SizeT> ///< Integer type used for global array indexing - __host__ __device__ __forceinline__ - static cudaError_t Dispatch( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_in, ///< [in] Iterator pointing to scan input - OutputIteratorRA d_out, ///< [in] Iterator pointing to scan output - ReductionOp reduction_op, ///< [in] Binary scan operator - Identity identity, ///< [in] Identity element - SizeT num_items, ///< [in] Total number of items to scan - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - // Data type - typedef typename std::iterator_traits::value_type T; - - // Tuning polices for the PTX architecture that will get dispatched to - typedef PtxDefaultPolicies PtxDefaultPolicies; - typedef typename PtxDefaultPolicies::MultiBlockPolicy MultiBlockPolicy; - - cudaError error = cudaSuccess; - do - { - // Declare dispatch parameters - KernelDispachParams multi_block_dispatch_params; - -#ifdef __CUDA_ARCH__ - // We're on the device, so initialize the dispatch parameters with the PtxDefaultPolicies directly - multi_block_dispatch_params.Init(); -#else - // We're on the host, so lookup and initialize the dispatch parameters with the policies that match the device's PTX version - int ptx_version; - if (CubDebug(error = PtxVersion(ptx_version))) break; - PtxDefaultPolicies::InitDispatchParams(ptx_version, multi_block_dispatch_params); -#endif - - Dispatch( - d_temp_storage, - temp_storage_bytes, - InitScanKernel, - MultiBlockScanKernel, - multi_block_dispatch_params, - d_in, - d_out, - reduction_op, - identity, - num_items, - stream, - stream_synchronous); - - if (CubDebug(error)) break; - } - while (0); - - return error; - } - - #endif // DOXYGEN_SHOULD_SKIP_THIS - - - /******************************************************************//** - * Interface - *********************************************************************/ - - - /** - * \brief Computes device-wide reductions of consecutive values whose corresponding keys are equal. - * - * The resulting output lists of value-aggregates and their corresponding keys are compacted. - * - * \devicestorage - * - * \tparam KeyInputIteratorRA [inferred] Random-access input iterator type for keys input (may be a simple pointer type) - * \tparam KeyOutputIteratorRA [inferred] Random-access output iterator type for keys output (may be a simple pointer type) - * \tparam ValueInputIteratorRA [inferred] Random-access input iterator type for values input (may be a simple pointer type) - * \tparam ValueOutputIteratorRA [inferred] Random-access output iterator type for values output (may be a simple pointer type) - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b), where \p T is the value type of \p ValueInputIteratorRA - */ - template < - typename KeyInputIteratorRA, - typename KeyOutputIteratorRA, - typename ValueInputIteratorRA, - typename ValueOutputIteratorRA, - typename ReductionOp> - __host__ __device__ __forceinline__ - static cudaError_t ReduceValues( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - KeyInputIteratorRA d_keys_in, ///< [in] Key input data - KeyOutputIteratorRA d_keys_out, ///< [out] Key output data (compacted) - ValueInputIteratorRA d_values_in, ///< [in] Value input data - ValueOutputIteratorRA d_values_out, ///< [out] Value output data (compacted) - int num_items, ///< [in] Total number of input pairs - ReductionOp reduction_op, ///< [in] Binary value reduction operator - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return Dispatch(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, d_values_in, d_values_out, reduction_op, num_items, stream, stream_synchronous); - } - - - /** - * \brief Computes device-wide sums of consecutive values whose corresponding keys are equal. - * - * The resulting output lists of value-aggregates and their corresponding keys are compacted. - * - * \devicestorage - * - * \tparam KeyInputIteratorRA [inferred] Random-access input iterator type for keys input (may be a simple pointer type) - * \tparam KeyOutputIteratorRA [inferred] Random-access output iterator type for keys output (may be a simple pointer type) - * \tparam ValueInputIteratorRA [inferred] Random-access input iterator type for values input (may be a simple pointer type) - * \tparam ValueOutputIteratorRA [inferred] Random-access output iterator type for values output (may be a simple pointer type) - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b), where \p T is the value type of \p ValueInputIteratorRA - */ - template < - typename KeyInputIteratorRA, - typename KeyOutputIteratorRA, - typename ValueInputIteratorRA, - typename ValueOutputIteratorRA> - __host__ __device__ __forceinline__ - static cudaError_t SumValues( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - KeyInputIteratorRA d_keys_in, ///< [in] Key input data - KeyOutputIteratorRA d_keys_out, ///< [in] Key output data (compacted) - ValueInputIteratorRA d_values_in, ///< [in] Value input data - ValueOutputIteratorRA d_values_out, ///< [in] Value output data (compacted) - int num_items, ///< [in] Total number of input pairs - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return ReduceValues(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, d_values_in, d_values_out, cub::Sum(), num_items, stream, stream_synchronous); - } - - - /** - * \brief Computes the "run-length" of each group of consecutive, equal-valued keys. - * - * The resulting output lists of run-length counts and their corresponding keys are compacted. - * - * \devicestorage - * - * \tparam KeyInputIteratorRA [inferred] Random-access input iterator type for keys input (may be a simple pointer type) - * \tparam KeyOutputIteratorRA [inferred] Random-access output iterator type for keys output (may be a simple pointer type) - * \tparam CountOutputIteratorRA [inferred] Random-access output iterator type for output of key-counts whose value type must be convertible to an integer type (may be a simple pointer type) - */ - template < - typename KeyInputIteratorRA, - typename KeyOutputIteratorRA, - typename CountOutputIteratorRA> - __host__ __device__ __forceinline__ - static cudaError_t RunLengths( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - KeyInputIteratorRA d_keys_in, ///< [in] Key input data - KeyOutputIteratorRA d_keys_out, ///< [in] Key output data (compacted) - CountOutputIteratorRA d_counts_out, ///< [in] Run-length counts output data (compacted) - int num_items, ///< [in] Total number of keys - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - typedef typename std::iterator_traits::value_type CountT; - return SumValues(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, ConstantIteratorRA(1), d_counts_out, num_items, stream, stream_synchronous); - } - - - /** - * \brief Removes duplicates within each group of consecutive, equal-valued keys. Only the first key from each group (and corresponding value) is kept. - * - * The resulting keys are compacted. - * - * \devicestorage - * - * \tparam KeyInputIteratorRA [inferred] Random-access input iterator type for keys input (may be a simple pointer type) - * \tparam KeyOutputIteratorRA [inferred] Random-access output iterator type for keys output (may be a simple pointer type) - * \tparam ValueInputIteratorRA [inferred] Random-access input iterator type for values input (may be a simple pointer type) - * \tparam ValueOutputIteratorRA [inferred] Random-access output iterator type for values output (may be a simple pointer type) - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b), where \p T is the value type of \p ValueInputIteratorRA - */ - template < - typename KeyInputIteratorRA, - typename KeyOutputIteratorRA, - typename ValueInputIteratorRA, - typename ValueOutputIteratorRA, - typename ReductionOp> - __host__ __device__ __forceinline__ - static cudaError_t Unique( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - KeyInputIteratorRA d_keys_in, ///< [in] Key input data - KeyOutputIteratorRA d_keys_out, ///< [out] Key output data (compacted) - ValueInputIteratorRA d_values_in, ///< [in] Value input data - ValueOutputIteratorRA d_values_out, ///< [out] Value output data (compacted) - int num_items, ///< [in] Total number of input pairs - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return Dispatch(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, d_values_in, d_values_out, reduction_op, num_items, stream, stream_synchronous); - } - - - -}; - - -/** @} */ // DeviceModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - - diff --git a/kokkos/kokkos/TPL/cub/device/device_reorder.cuh b/kokkos/kokkos/TPL/cub/device/device_reorder.cuh deleted file mode 100644 index cba3bb4..0000000 --- a/kokkos/kokkos/TPL/cub/device/device_reorder.cuh +++ /dev/null @@ -1,550 +0,0 @@ - -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::DeviceReorder provides device-wide operations for partitioning and filtering lists of items residing within global memory. - */ - -#pragma once - -#include -#include - -#include "device_scan.cuh" -#include "block/block_partition_tiles.cuh" -#include "../grid/grid_queue.cuh" -#include "../util_debug.cuh" -#include "../util_device.cuh" -#include "../util_vector.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Kernel entry points - *****************************************************************************/ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -/** - * Partition kernel entry point (multi-block) - */ -template < - typename BlockPartitionTilesPolicy, ///< Tuning policy for cub::BlockPartitionTiles abstraction - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename LengthOutputIterator, ///< Output iterator type for recording the length of the first partition (may be a simple pointer type) - typename PredicateOp, ///< Unary predicate operator indicating membership in the first partition type having member bool operator()(const T &val) - typename SizeT> ///< Integer type used for global array indexing -__launch_bounds__ (int(BlockPartitionTilesPolicy::BLOCK_THREADS)) -__global__ void PartitionKernel( - InputIteratorRA d_in, ///< Input data - OutputIteratorRA d_out, ///< Output data - LengthOutputIterator d_partition_length, ///< Number of items in the first partition - ScanTileDescriptor > *d_tile_status, ///< Global list of tile status - PredicateOp pred_op, ///< Unary predicate operator indicating membership in the first partition - SizeT num_items, ///< Total number of input items for the entire problem - int num_tiles, ///< Totla number of intut tiles for the entire problem - GridQueue queue) ///< Descriptor for performing dynamic mapping of tile data to thread blocks -{ - enum - { - TILE_STATUS_PADDING = PtxArchProps::WARP_THREADS, - }; - - typedef PartitionScanTuple PartitionScanTuple; - - // Thread block type for scanning input tiles - typedef BlockPartitionTiles< - BlockPartitionTilesPolicy, - InputIteratorRA, - OutputIteratorRA, - PredicateOp, - SizeT> BlockPartitionTilesT; - - // Shared memory for BlockPartitionTiles - __shared__ typename BlockPartitionTilesT::TempStorage temp_storage; - - // Process tiles - PartitionScanTuple partition_ends; // Ending offsets for partitions (one-after) - bool is_last_tile; // Whether or not this block handled the last tile (i.e., partition_ends is valid for the entire input) - BlockPartitionTilesT(temp_storage, d_in, d_out, d_tile_status + TILE_STATUS_PADDING, pred_op, num_items).ConsumeTiles( - queue, - num_tiles, - partition_ends, - is_last_tile); - - // Record the length of the first partition - if (is_last_tile && (threadIdx.x == 0)) - { - *d_partition_length = partition_ends.x; - } -} - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - - -/****************************************************************************** - * DeviceReorder - *****************************************************************************/ - -/** - * \addtogroup DeviceModule - * @{ - */ - -/** - * \brief DeviceReorder provides device-wide operations for partitioning and filtering lists of items residing within global memory - */ -struct DeviceReorder -{ -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - /// Generic structure for encapsulating dispatch properties. Mirrors the constants within BlockPartitionTilesPolicy. - struct KernelDispachParams - { - int block_threads; - int items_per_thread; - BlockScanAlgorithm scan_algorithm; - int tile_size; - - template - __host__ __device__ __forceinline__ - void Init() - { - block_threads = BlockPartitionTilesPolicy::BLOCK_THREADS; - items_per_thread = BlockPartitionTilesPolicy::ITEMS_PER_THREAD; - scan_algorithm = BlockPartitionTilesPolicy::SCAN_ALGORITHM; - tile_size = block_threads * items_per_thread; - } - }; - - - /****************************************************************************** - * Tuning policies - ******************************************************************************/ - - - /// Specializations of tuned policy types for different PTX architectures - template < - int PARTITIONS, - typename T, - typename SizeT, - int ARCH> - struct TunedPolicies; - - /// SM35 tune - template - struct TunedPolicies - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 16, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef BlockPartitionTilesPolicy PartitionPolicy; - }; - - /// SM30 tune - template - struct TunedPolicies - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 9, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef BlockPartitionTilesPolicy PartitionPolicy; - }; - - /// SM20 tune - template - struct TunedPolicies - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 15, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef BlockPartitionTilesPolicy PartitionPolicy; - }; - - /// SM10 tune - template - struct TunedPolicies - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 7, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - typedef BlockPartitionTilesPolicy PartitionPolicy; - }; - - - /// Tuning policy for the PTX architecture that DevicePartition operations will get dispatched to - template - struct PtxDefaultPolicies - { - static const int PTX_TUNE_ARCH = (CUB_PTX_ARCH >= 350) ? - 350 : - (CUB_PTX_ARCH >= 300) ? - 300 : - (CUB_PTX_ARCH >= 200) ? - 200 : - 100; - - // Tuned policy set for the current PTX compiler pass - typedef TunedPolicies PtxTunedPolicies; - - // PartitionPolicy that opaquely derives from the specialization corresponding to the current PTX compiler pass - struct PartitionPolicy : PtxTunedPolicies::PartitionPolicy {}; - - /** - * Initialize dispatch params with the policies corresponding to the PTX assembly we will use - */ - static void InitDispatchParams(int ptx_version, KernelDispachParams &scan_dispatch_params) - { - if (ptx_version >= 350) - { - typedef TunedPolicies TunedPolicies; - scan_dispatch_params.Init(); - } - else if (ptx_version >= 300) - { - typedef TunedPolicies TunedPolicies; - scan_dispatch_params.Init(); - } - else if (ptx_version >= 200) - { - typedef TunedPolicies TunedPolicies; - scan_dispatch_params.Init(); - } - else - { - typedef TunedPolicies TunedPolicies; - scan_dispatch_params.Init(); - } - } - }; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /** - * Internal dispatch routine - */ - template < - typename ScanInitKernelPtr, ///< Function type of cub::ScanInitKernel - typename PartitionKernelPtr, ///< Function type of cub::PartitionKernel - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename LengthOutputIterator, ///< Output iterator type for recording the length of the first partition (may be a simple pointer type) - typename PredicateOp, ///< Unary predicate operator indicating membership in the first partition type having member bool operator()(const T &val) - typename SizeT> ///< Integer type used for global array indexing - __host__ __device__ __forceinline__ - static cudaError_t Dispatch( - int ptx_version, ///< [in] PTX version - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - ScanInitKernelPtr init_kernel, ///< [in] Kernel function pointer to parameterization of cub::PartitionInitKernel - PartitionKernelPtr partition_kernel, ///< [in] Kernel function pointer to parameterization of cub::PartitionKernel - KernelDispachParams &scan_dispatch_params, ///< [in] Dispatch parameters that match the policy that \p partition_kernel was compiled for - InputIteratorRA d_in, ///< [in] Iterator pointing to scan input - OutputIteratorRA d_out, ///< [in] Iterator pointing to scan output - LengthOutputIterator d_partition_length, ///< [out] Output iterator referencing the location where the pivot offset (i.e., the length of the first partition) is to be recorded - PredicateOp pred_op, ///< [in] Unary predicate operator indicating membership in the first partition - SizeT num_items, ///< [in] Total number of items to partition - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - -#ifndef CUB_RUNTIME_ENABLED - - // Kernel launch not supported from this device - return CubDebug(cudaErrorNotSupported); - -#else - - enum - { - TILE_STATUS_PADDING = 32, - }; - - // Data type - typedef typename std::iterator_traits::value_type T; - - // Scan tuple type and tile status descriptor type - typedef typename VectorHelper::Type ScanTuple; - typedef ScanTileDescriptor ScanTileDescriptorT; - - cudaError error = cudaSuccess; - do - { - // Number of input tiles - int num_tiles = (num_items + scan_dispatch_params.tile_size - 1) / scan_dispatch_params.tile_size; - - // Temporary storage allocation requirements - void* allocations[2]; - size_t allocation_sizes[2] = - { - (num_tiles + TILE_STATUS_PADDING) * sizeof(ScanTileDescriptorT), // bytes needed for tile status descriptors - GridQueue::AllocationSize() // bytes needed for grid queue descriptor - }; - - // Alias temporaries (or set the necessary size of the storage allocation) - if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; - - // Return if the caller is simply requesting the size of the storage allocation - if (d_temp_storage == NULL) - return cudaSuccess; - - // Global list of tile status - ScanTileDescriptorT *d_tile_status = (ScanTileDescriptorT*) allocations[0]; - - // Grid queue descriptor - GridQueue queue(allocations[1]); - - // Log init_kernel configuration - int init_kernel_threads = 128; - int init_grid_size = (num_tiles + init_kernel_threads - 1) / init_kernel_threads; - if (stream_synchronous) CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, init_kernel_threads, (long long) stream); - - // Invoke init_kernel to initialize tile descriptors and queue descriptors - init_kernel<<>>( - queue, - d_tile_status, - num_tiles); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - - // Get grid size for multi-block kernel - int scan_grid_size; - int multi_sm_occupancy = -1; - if (ptx_version < 200) - { - // We don't have atomics (or don't have fast ones), so just assign one - // block per tile (limited to 65K tiles) - scan_grid_size = num_tiles; - } - else - { - // We have atomics and can thus reuse blocks across multiple tiles using a queue descriptor. - // Get GPU id - int device_ordinal; - if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; - - // Get SM count - int sm_count; - if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; - - // Get a rough estimate of partition_kernel SM occupancy based upon the maximum SM occupancy of the targeted PTX architecture - multi_sm_occupancy = CUB_MIN( - ArchProps::MAX_SM_THREADBLOCKS, - ArchProps::MAX_SM_THREADS / scan_dispatch_params.block_threads); - -#ifndef __CUDA_ARCH__ - // We're on the host, so come up with a - Device device_props; - if (CubDebug(error = device_props.Init(device_ordinal))) break; - - if (CubDebug(error = device_props.MaxSmOccupancy( - multi_sm_occupancy, - partition_kernel, - scan_dispatch_params.block_threads))) break; -#endif - // Get device occupancy for partition_kernel - int scan_occupancy = multi_sm_occupancy * sm_count; - - // Get grid size for partition_kernel - scan_grid_size = (num_tiles < scan_occupancy) ? - num_tiles : // Not enough to fill the device with threadblocks - scan_occupancy; // Fill the device with threadblocks - } - - // Log partition_kernel configuration - if (stream_synchronous) CubLog("Invoking partition_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", - scan_grid_size, scan_dispatch_params.block_threads, (long long) stream, scan_dispatch_params.items_per_thread, multi_sm_occupancy); - - // Invoke partition_kernel - partition_kernel<<>>( - d_in, - d_out, - d_partition_length, - d_tile_status, - pred_op, - num_items, - num_tiles, - queue); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - } - while (0); - - return error; - -#endif // CUB_RUNTIME_ENABLED - } - - - - /** - * Internal partition dispatch routine for using default tuning policies - */ - template < - typename PARTITIONS, ///< Number of partitions we are keeping - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename LengthOutputIterator, ///< Output iterator type for recording the length of the first partition (may be a simple pointer type) - typename PredicateOp, ///< Unary predicate operator indicating membership in the first partition type having member bool operator()(const T &val) - typename SizeT> ///< Integer type used for global array indexing - __host__ __device__ __forceinline__ - static cudaError_t Dispatch( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_in, ///< [in] Iterator pointing to input items - OutputIteratorRA d_out, ///< [in] Iterator pointing to output items - LengthOutputIterator d_partition_length, ///< [out] Output iterator referencing the location where the pivot offset (i.e., the length of the first partition) is to be recorded - PredicateOp pred_op, ///< [in] Unary predicate operator indicating membership in the first partition - SizeT num_items, ///< [in] Total number of items to partition - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - // Data type - typedef typename std::iterator_traits::value_type T; - - // Tuning polices - typedef PtxDefaultPolicies PtxDefaultPolicies; // Wrapper of default kernel policies - typedef typename PtxDefaultPolicies::PartitionPolicy PartitionPolicy; // Partition kernel policy - - cudaError error = cudaSuccess; - do - { - // Declare dispatch parameters - KernelDispachParams scan_dispatch_params; - - int ptx_version; -#ifdef __CUDA_ARCH__ - // We're on the device, so initialize the dispatch parameters with the PtxDefaultPolicies directly - scan_dispatch_params.Init(); - ptx_version = CUB_PTX_ARCH; -#else - // We're on the host, so lookup and initialize the dispatch parameters with the policies that match the device's PTX version - if (CubDebug(error = PtxVersion(ptx_version))) break; - PtxDefaultPolicies::InitDispatchParams(ptx_version, scan_dispatch_params); -#endif - - Dispatch( - ptx_version, - d_temp_storage, - temp_storage_bytes, - ScanInitKernel, - PartitionKernel, - scan_dispatch_params, - d_in, - d_out, - d_partition_length, - pred_op, - num_items, - stream, - stream_synchronous); - - if (CubDebug(error)) break; - } - while (0); - - return error; - } - - #endif // DOXYGEN_SHOULD_SKIP_THIS - - - /** - * \brief Splits a list of input items into two partitions within the given output list using the specified predicate. The relative ordering of inputs is not necessarily preserved. - * - * An item \p val is placed in the first partition if pred_op(val) == true, otherwise - * it is placed in the second partition. The offset of the partitioning pivot (equivalent to - * the total length of the first partition as well as the starting offset of the second), is - * recorded to \p d_partition_length. - * - * The length of the output referenced by \p d_out is assumed to be the same as that of \p d_in. - * - * \devicestorage - * - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) - * \tparam OutputIteratorRA [inferred] Random-access iterator type for output (may be a simple pointer type) - * \tparam LengthOutputIterator [inferred] Random-access iterator type for output (may be a simple pointer type) - * \tparam PredicateOp [inferred] Unary predicate operator indicating membership in the first partition type having member bool operator()(const T &val) - */ - template < - typename InputIteratorRA, - typename OutputIteratorRA, - typename LengthOutputIterator, - typename PredicateOp> - __host__ __device__ __forceinline__ - static cudaError_t Partition( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_in, ///< [in] Iterator pointing to input items - OutputIteratorRA d_out, ///< [in] Iterator pointing to output items - LengthOutputIterator d_pivot_offset, ///< [out] Output iterator referencing the location where the pivot offset is to be recorded - PredicateOp pred_op, ///< [in] Unary predicate operator indicating membership in the first partition - int num_items, ///< [in] Total number of items to partition - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - typedef typename std::iterator_traits::value_type T; - return Dispatch(d_temp_storage, temp_storage_bytes, d_in, d_out, Sum(), T(), num_items, stream, stream_synchronous); - } - - -}; - - -/** @} */ // DeviceModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - - diff --git a/kokkos/kokkos/TPL/cub/device/device_scan.cuh b/kokkos/kokkos/TPL/cub/device/device_scan.cuh deleted file mode 100644 index c0640c8..0000000 --- a/kokkos/kokkos/TPL/cub/device/device_scan.cuh +++ /dev/null @@ -1,812 +0,0 @@ - -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::DeviceScan provides operations for computing a device-wide, parallel prefix scan across data items residing within global memory. - */ - -#pragma once - -#include -#include - -#include "block/block_scan_tiles.cuh" -#include "../thread/thread_operators.cuh" -#include "../grid/grid_queue.cuh" -#include "../util_debug.cuh" -#include "../util_device.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Kernel entry points - *****************************************************************************/ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - -/** - * Initialization kernel for tile status initialization (multi-block) - */ -template < - typename T, ///< Scan value type - typename SizeT> ///< Integer type used for global array indexing -__global__ void ScanInitKernel( - GridQueue grid_queue, ///< [in] Descriptor for performing dynamic mapping of input tiles to thread blocks - ScanTileDescriptor *d_tile_status, ///< [out] Tile status words - int num_tiles) ///< [in] Number of tiles -{ - typedef ScanTileDescriptor ScanTileDescriptorT; - - enum - { - TILE_STATUS_PADDING = PtxArchProps::WARP_THREADS, - }; - - // Reset queue descriptor - if ((blockIdx.x == 0) && (threadIdx.x == 0)) grid_queue.ResetDrain(num_tiles); - - // Initialize tile status - int tile_offset = (blockIdx.x * blockDim.x) + threadIdx.x; - if (tile_offset < num_tiles) - { - // Not-yet-set - d_tile_status[TILE_STATUS_PADDING + tile_offset].status = SCAN_TILE_INVALID; - } - - if ((blockIdx.x == 0) && (threadIdx.x < TILE_STATUS_PADDING)) - { - // Padding - d_tile_status[threadIdx.x].status = SCAN_TILE_OOB; - } -} - - -/** - * Scan kernel entry point (multi-block) - */ -template < - typename BlockScanTilesPolicy, ///< Tuning policy for cub::BlockScanTiles abstraction - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename T, ///< The scan data type - typename ScanOp, ///< Binary scan operator type having member T operator()(const T &a, const T &b) - typename Identity, ///< Identity value type (cub::NullType for inclusive scans) - typename SizeT> ///< Integer type used for global array indexing -__launch_bounds__ (int(BlockScanTilesPolicy::BLOCK_THREADS)) -__global__ void ScanKernel( - InputIteratorRA d_in, ///< Input data - OutputIteratorRA d_out, ///< Output data - ScanTileDescriptor *d_tile_status, ///< Global list of tile status - ScanOp scan_op, ///< Binary scan operator - Identity identity, ///< Identity element - SizeT num_items, ///< Total number of scan items for the entire problem - GridQueue queue) ///< Descriptor for performing dynamic mapping of tile data to thread blocks -{ - enum - { - TILE_STATUS_PADDING = PtxArchProps::WARP_THREADS, - }; - - // Thread block type for scanning input tiles - typedef BlockScanTiles< - BlockScanTilesPolicy, - InputIteratorRA, - OutputIteratorRA, - ScanOp, - Identity, - SizeT> BlockScanTilesT; - - // Shared memory for BlockScanTiles - __shared__ typename BlockScanTilesT::TempStorage temp_storage; - - // Process tiles - BlockScanTilesT(temp_storage, d_in, d_out, scan_op, identity).ConsumeTiles( - num_items, - queue, - d_tile_status + TILE_STATUS_PADDING); -} - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - - -/****************************************************************************** - * DeviceScan - *****************************************************************************/ - -/** - * \brief DeviceScan provides operations for computing a device-wide, parallel prefix scan across data items residing within global memory. ![](device_scan.png) - * \ingroup DeviceModule - * - * \par Overview - * Given a list of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) - * produces an output list where each element is computed to be the reduction - * of the elements occurring earlier in the input list. Prefix sum - * connotes a prefix scan with the addition operator. The term \em inclusive indicates - * that the ith output reduction incorporates the ith input. - * The term \em exclusive indicates the ith input is not incorporated into - * the ith output reduction. - * - * \par Usage Considerations - * \cdp_class{DeviceScan} - * - * \par Performance - * - * \image html scan_perf.png - * - */ -struct DeviceScan -{ -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - /// Generic structure for encapsulating dispatch properties. Mirrors the constants within BlockScanTilesPolicy. - struct KernelDispachParams - { - // Policy fields - int block_threads; - int items_per_thread; - BlockLoadAlgorithm load_policy; - BlockStoreAlgorithm store_policy; - BlockScanAlgorithm scan_algorithm; - - // Other misc - int tile_size; - - template - __host__ __device__ __forceinline__ - void Init() - { - block_threads = BlockScanTilesPolicy::BLOCK_THREADS; - items_per_thread = BlockScanTilesPolicy::ITEMS_PER_THREAD; - load_policy = BlockScanTilesPolicy::LOAD_ALGORITHM; - store_policy = BlockScanTilesPolicy::STORE_ALGORITHM; - scan_algorithm = BlockScanTilesPolicy::SCAN_ALGORITHM; - - tile_size = block_threads * items_per_thread; - } - - __host__ __device__ __forceinline__ - void Print() - { - printf("%d, %d, %d, %d, %d", - block_threads, - items_per_thread, - load_policy, - store_policy, - scan_algorithm); - } - - }; - - - /****************************************************************************** - * Tuning policies - ******************************************************************************/ - - - /// Specializations of tuned policy types for different PTX architectures - template < - typename T, - typename SizeT, - int ARCH> - struct TunedPolicies; - - /// SM35 tune - template - struct TunedPolicies - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 16, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - // ScanPolicy: GTX Titan: 29.1B items/s (232.4 GB/s) @ 48M 32-bit T - typedef BlockScanTilesPolicy<128, ITEMS_PER_THREAD, BLOCK_LOAD_DIRECT, false, LOAD_LDG, BLOCK_STORE_WARP_TRANSPOSE, true, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; - }; - - /// SM30 tune - template - struct TunedPolicies - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 9, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef BlockScanTilesPolicy<256, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, false, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, false, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; - }; - - /// SM20 tune - template - struct TunedPolicies - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 15, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - // ScanPolicy: GTX 580: 20.3B items/s (162.3 GB/s) @ 48M 32-bit T - typedef BlockScanTilesPolicy<128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, false, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, false, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; - }; - - /// SM10 tune - template - struct TunedPolicies - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 7, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - typedef BlockScanTilesPolicy<128, ITEMS_PER_THREAD, BLOCK_LOAD_TRANSPOSE, false, LOAD_DEFAULT, BLOCK_STORE_TRANSPOSE, false, BLOCK_SCAN_RAKING> ScanPolicy; - }; - - - /// Tuning policy for the PTX architecture that DeviceScan operations will get dispatched to - template - struct PtxDefaultPolicies - { - static const int PTX_TUNE_ARCH = (CUB_PTX_ARCH >= 350) ? - 350 : - (CUB_PTX_ARCH >= 300) ? - 300 : - (CUB_PTX_ARCH >= 200) ? - 200 : - 100; - - // Tuned policy set for the current PTX compiler pass - typedef TunedPolicies PtxTunedPolicies; - - // ScanPolicy that opaquely derives from the specialization corresponding to the current PTX compiler pass - struct ScanPolicy : PtxTunedPolicies::ScanPolicy {}; - - /** - * Initialize dispatch params with the policies corresponding to the PTX assembly we will use - */ - static void InitDispatchParams(int ptx_version, KernelDispachParams &scan_dispatch_params) - { - if (ptx_version >= 350) - { - typedef TunedPolicies TunedPolicies; - scan_dispatch_params.Init(); - } - else if (ptx_version >= 300) - { - typedef TunedPolicies TunedPolicies; - scan_dispatch_params.Init(); - } - else if (ptx_version >= 200) - { - typedef TunedPolicies TunedPolicies; - scan_dispatch_params.Init(); - } - else - { - typedef TunedPolicies TunedPolicies; - scan_dispatch_params.Init(); - } - } - }; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /** - * Internal dispatch routine - */ - template < - typename ScanInitKernelPtr, ///< Function type of cub::ScanInitKernel - typename ScanKernelPtr, ///< Function type of cub::ScanKernel - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename ScanOp, ///< Binary scan operator type having member T operator()(const T &a, const T &b) - typename Identity, ///< Identity value type (cub::NullType for inclusive scans) - typename SizeT> ///< Integer type used for global array indexing - __host__ __device__ __forceinline__ - static cudaError_t Dispatch( - int ptx_version, ///< [in] PTX version - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - ScanInitKernelPtr init_kernel, ///< [in] Kernel function pointer to parameterization of cub::ScanInitKernel - ScanKernelPtr scan_kernel, ///< [in] Kernel function pointer to parameterization of cub::ScanKernel - KernelDispachParams &scan_dispatch_params, ///< [in] Dispatch parameters that match the policy that \p scan_kernel was compiled for - InputIteratorRA d_in, ///< [in] Iterator pointing to scan input - OutputIteratorRA d_out, ///< [in] Iterator pointing to scan output - ScanOp scan_op, ///< [in] Binary scan operator - Identity identity, ///< [in] Identity element - SizeT num_items, ///< [in] Total number of items to scan - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - -#ifndef CUB_RUNTIME_ENABLED - - // Kernel launch not supported from this device - return CubDebug(cudaErrorNotSupported); - -#else - - enum - { - TILE_STATUS_PADDING = 32, - INIT_KERNEL_THREADS = 128 - }; - - // Data type - typedef typename std::iterator_traits::value_type T; - - // Tile status descriptor type - typedef ScanTileDescriptor ScanTileDescriptorT; - - cudaError error = cudaSuccess; - do - { - // Number of input tiles - int num_tiles = (num_items + scan_dispatch_params.tile_size - 1) / scan_dispatch_params.tile_size; - - // Temporary storage allocation requirements - void* allocations[2]; - size_t allocation_sizes[2] = - { - (num_tiles + TILE_STATUS_PADDING) * sizeof(ScanTileDescriptorT), // bytes needed for tile status descriptors - GridQueue::AllocationSize() // bytes needed for grid queue descriptor - }; - - // Alias temporaries (or set the necessary size of the storage allocation) - if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; - - // Return if the caller is simply requesting the size of the storage allocation - if (d_temp_storage == NULL) - return cudaSuccess; - - // Global list of tile status - ScanTileDescriptorT *d_tile_status = (ScanTileDescriptorT*) allocations[0]; - - // Grid queue descriptor - GridQueue queue(allocations[1]); - - // Log init_kernel configuration - int init_grid_size = (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS; - if (stream_synchronous) CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); - - // Invoke init_kernel to initialize tile descriptors and queue descriptors - init_kernel<<>>( - queue, - d_tile_status, - num_tiles); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - - // Get grid size for multi-block kernel - int scan_grid_size; - int multi_sm_occupancy = -1; - if (ptx_version < 200) - { - // We don't have atomics (or don't have fast ones), so just assign one - // block per tile (limited to 65K tiles) - scan_grid_size = num_tiles; - } - else - { - // We have atomics and can thus reuse blocks across multiple tiles using a queue descriptor. - // Get GPU id - int device_ordinal; - if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; - - // Get SM count - int sm_count; - if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; - - // Get a rough estimate of scan_kernel SM occupancy based upon the maximum SM occupancy of the targeted PTX architecture - multi_sm_occupancy = CUB_MIN( - ArchProps::MAX_SM_THREADBLOCKS, - ArchProps::MAX_SM_THREADS / scan_dispatch_params.block_threads); - -#ifndef __CUDA_ARCH__ - // We're on the host, so come up with a - Device device_props; - if (CubDebug(error = device_props.Init(device_ordinal))) break; - - if (CubDebug(error = device_props.MaxSmOccupancy( - multi_sm_occupancy, - scan_kernel, - scan_dispatch_params.block_threads))) break; -#endif - // Get device occupancy for scan_kernel - int scan_occupancy = multi_sm_occupancy * sm_count; - - // Get grid size for scan_kernel - scan_grid_size = (num_tiles < scan_occupancy) ? - num_tiles : // Not enough to fill the device with threadblocks - scan_occupancy; // Fill the device with threadblocks - } - - // Log scan_kernel configuration - if (stream_synchronous) CubLog("Invoking scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", - scan_grid_size, scan_dispatch_params.block_threads, (long long) stream, scan_dispatch_params.items_per_thread, multi_sm_occupancy); - - // Invoke scan_kernel - scan_kernel<<>>( - d_in, - d_out, - d_tile_status, - scan_op, - identity, - num_items, - queue); - - // Sync the stream if specified - if (stream_synchronous && (CubDebug(error = SyncStream(stream)))) break; - } - while (0); - - return error; - -#endif // CUB_RUNTIME_ENABLED - } - - - - /** - * Internal scan dispatch routine for using default tuning policies - */ - template < - typename InputIteratorRA, ///< Random-access iterator type for input (may be a simple pointer type) - typename OutputIteratorRA, ///< Random-access iterator type for output (may be a simple pointer type) - typename ScanOp, ///< Binary scan operator type having member T operator()(const T &a, const T &b) - typename Identity, ///< Identity value type (cub::NullType for inclusive scans) - typename SizeT> ///< Integer type used for global array indexing - __host__ __device__ __forceinline__ - static cudaError_t Dispatch( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_in, ///< [in] Iterator pointing to scan input - OutputIteratorRA d_out, ///< [in] Iterator pointing to scan output - ScanOp scan_op, ///< [in] Binary scan operator - Identity identity, ///< [in] Identity element - SizeT num_items, ///< [in] Total number of items to scan - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Default is \p false. - { - // Data type - typedef typename std::iterator_traits::value_type T; - - // Tuning polices - typedef PtxDefaultPolicies PtxDefaultPolicies; // Wrapper of default kernel policies - typedef typename PtxDefaultPolicies::ScanPolicy ScanPolicy; // Scan kernel policy - - cudaError error = cudaSuccess; - do - { - // Declare dispatch parameters - KernelDispachParams scan_dispatch_params; - - int ptx_version; -#ifdef __CUDA_ARCH__ - // We're on the device, so initialize the dispatch parameters with the PtxDefaultPolicies directly - scan_dispatch_params.Init(); - ptx_version = CUB_PTX_ARCH; -#else - // We're on the host, so lookup and initialize the dispatch parameters with the policies that match the device's PTX version - if (CubDebug(error = PtxVersion(ptx_version))) break; - PtxDefaultPolicies::InitDispatchParams(ptx_version, scan_dispatch_params); -#endif - - Dispatch( - ptx_version, - d_temp_storage, - temp_storage_bytes, - ScanInitKernel, - ScanKernel, - scan_dispatch_params, - d_in, - d_out, - scan_op, - identity, - num_items, - stream, - stream_synchronous); - - if (CubDebug(error)) break; - } - while (0); - - return error; - } - - #endif // DOXYGEN_SHOULD_SKIP_THIS - - - /******************************************************************//** - * \name Exclusive scans - *********************************************************************/ - //@{ - - /** - * \brief Computes a device-wide exclusive prefix sum. - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the exclusive prefix sum of a device vector of \p int items. - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input and output - * int *d_scan_input, *d_scan_output; - * int num_items = ... - * - * ... - * - * // Determine temporary device storage requirements for exclusive prefix sum - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_scan_input, d_scan_output, num_items); - * - * // Allocate temporary storage for exclusive prefix sum - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Run exclusive prefix sum - * cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_scan_input, d_scan_output, num_items); - * - * \endcode - * - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) - * \tparam OutputIteratorRA [inferred] Random-access iterator type for output (may be a simple pointer type) - */ - template < - typename InputIteratorRA, - typename OutputIteratorRA> - __host__ __device__ __forceinline__ - static cudaError_t ExclusiveSum( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_in, ///< [in] Iterator pointing to scan input - OutputIteratorRA d_out, ///< [in] Iterator pointing to scan output - int num_items, ///< [in] Total number of items to scan - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - typedef typename std::iterator_traits::value_type T; - return Dispatch(d_temp_storage, temp_storage_bytes, d_in, d_out, Sum(), T(), num_items, stream, stream_synchronous); - } - - - /** - * \brief Computes a device-wide exclusive prefix scan using the specified binary \p scan_op functor. - * - * \par - * Supports non-commutative scan operators. - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the exclusive prefix scan of a device vector of \p int items. - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input and output - * int *d_scan_input, *d_scan_output; - * int num_items = ... - * - * ... - * - * // Determine temporary device storage requirements for exclusive prefix scan - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, d_scan_input, d_scan_output, cub::Max(), (int) MIN_INT, num_items); - * - * // Allocate temporary storage for exclusive prefix scan - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Run exclusive prefix scan (max) - * cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, d_scan_input, d_scan_output, cub::Max(), (int) MIN_INT, num_items); - * - * \endcode - * - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) - * \tparam OutputIteratorRA [inferred] Random-access iterator type for output (may be a simple pointer type) - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - * \tparam Identity [inferred] Type of the \p identity value used Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template < - typename InputIteratorRA, - typename OutputIteratorRA, - typename ScanOp, - typename Identity> - __host__ __device__ __forceinline__ - static cudaError_t ExclusiveScan( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_in, ///< [in] Iterator pointing to scan input - OutputIteratorRA d_out, ///< [in] Iterator pointing to scan output - ScanOp scan_op, ///< [in] Binary scan operator - Identity identity, ///< [in] Identity element - int num_items, ///< [in] Total number of items to scan - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return Dispatch(d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, identity, num_items, stream, stream_synchronous); - } - - - //@} end member group - /******************************************************************//** - * \name Inclusive scans - *********************************************************************/ - //@{ - - - /** - * \brief Computes a device-wide inclusive prefix sum. - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the inclusive prefix sum of a device vector of \p int items. - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input and output - * int *d_scan_input, *d_scan_output; - * int num_items = ... - * ... - * - * // Determine temporary device storage requirements for inclusive prefix sum - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_scan_input, d_scan_output, num_items); - * - * // Allocate temporary storage for inclusive prefix sum - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Run inclusive prefix sum - * cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_scan_input, d_scan_output, num_items); - * - * \endcode - * - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) - * \tparam OutputIteratorRA [inferred] Random-access iterator type for output (may be a simple pointer type) - */ - template < - typename InputIteratorRA, - typename OutputIteratorRA> - __host__ __device__ __forceinline__ - static cudaError_t InclusiveSum( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_in, ///< [in] Iterator pointing to scan input - OutputIteratorRA d_out, ///< [in] Iterator pointing to scan output - int num_items, ///< [in] Total number of items to scan - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return Dispatch(d_temp_storage, temp_storage_bytes, d_in, d_out, Sum(), NullType(), num_items, stream, stream_synchronous); - } - - - /** - * \brief Computes a device-wide inclusive prefix scan using the specified binary \p scan_op functor. - * - * \par - * Supports non-commutative scan operators. - * - * \devicestorage - * - * \cdp - * - * \iterator - * - * \par - * The code snippet below illustrates the inclusive prefix scan of a device vector of \p int items. - * \par - * \code - * #include - * ... - * - * // Declare and initialize device pointers for input and output - * int *d_scan_input, *d_scan_output; - * int num_items = ... - * ... - * - * // Determine temporary device storage requirements for inclusive prefix scan - * void *d_temp_storage = NULL; - * size_t temp_storage_bytes = 0; - * cub::DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, d_scan_input, d_scan_output, cub::Max(), num_items); - * - * // Allocate temporary storage for inclusive prefix scan - * cudaMalloc(&d_temp_storage, temp_storage_bytes); - * - * // Run inclusive prefix scan (max) - * cub::DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, d_scan_input, d_scan_output, cub::Max(), num_items); - * - * \endcode - * - * \tparam InputIteratorRA [inferred] Random-access iterator type for input (may be a simple pointer type) - * \tparam OutputIteratorRA [inferred] Random-access iterator type for output (may be a simple pointer type) - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template < - typename InputIteratorRA, - typename OutputIteratorRA, - typename ScanOp> - __host__ __device__ __forceinline__ - static cudaError_t InclusiveScan( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \p d_temp_storage allocation. - InputIteratorRA d_in, ///< [in] Iterator pointing to scan input - OutputIteratorRA d_out, ///< [in] Iterator pointing to scan output - ScanOp scan_op, ///< [in] Binary scan operator - int num_items, ///< [in] Total number of items to scan - cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. - bool stream_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. - { - return Dispatch(d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, NullType(), num_items, stream, stream_synchronous); - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - - diff --git a/kokkos/kokkos/TPL/cub/grid/grid_barrier.cuh b/kokkos/kokkos/TPL/cub/grid/grid_barrier.cuh deleted file mode 100644 index ebdc4b5..0000000 --- a/kokkos/kokkos/TPL/cub/grid/grid_barrier.cuh +++ /dev/null @@ -1,211 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::GridBarrier implements a software global barrier among thread blocks within a CUDA grid - */ - -#pragma once - -#include "../util_debug.cuh" -#include "../util_namespace.cuh" -#include "../thread/thread_load.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup GridModule - * @{ - */ - - -/** - * \brief GridBarrier implements a software global barrier among thread blocks within a CUDA grid - */ -class GridBarrier -{ -protected : - - typedef unsigned int SyncFlag; - - // Counters in global device memory - SyncFlag* d_sync; - -public: - - /** - * Constructor - */ - GridBarrier() : d_sync(NULL) {} - - - /** - * Synchronize - */ - __device__ __forceinline__ void Sync() const - { - volatile SyncFlag *d_vol_sync = d_sync; - - // Threadfence and syncthreads to make sure global writes are visible before - // thread-0 reports in with its sync counter - __threadfence(); - __syncthreads(); - - if (blockIdx.x == 0) - { - // Report in ourselves - if (threadIdx.x == 0) - { - d_vol_sync[blockIdx.x] = 1; - } - - __syncthreads(); - - // Wait for everyone else to report in - for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x) - { - while (ThreadLoad(d_sync + peer_block) == 0) - { - __threadfence_block(); - } - } - - __syncthreads(); - - // Let everyone know it's safe to proceed - for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x) - { - d_vol_sync[peer_block] = 0; - } - } - else - { - if (threadIdx.x == 0) - { - // Report in - d_vol_sync[blockIdx.x] = 1; - - // Wait for acknowledgment - while (ThreadLoad(d_sync + blockIdx.x) == 1) - { - __threadfence_block(); - } - } - - __syncthreads(); - } - } -}; - - -/** - * \brief GridBarrierLifetime extends GridBarrier to provide lifetime management of the temporary device storage needed for cooperation. - * - * Uses RAII for lifetime, i.e., device resources are reclaimed when - * the destructor is called. - */ -class GridBarrierLifetime : public GridBarrier -{ -protected: - - // Number of bytes backed by d_sync - size_t sync_bytes; - -public: - - /** - * Constructor - */ - GridBarrierLifetime() : GridBarrier(), sync_bytes(0) {} - - - /** - * DeviceFrees and resets the progress counters - */ - cudaError_t HostReset() - { - cudaError_t retval = cudaSuccess; - if (d_sync) - { - CubDebug(retval = cudaFree(d_sync)); - d_sync = NULL; - } - sync_bytes = 0; - return retval; - } - - - /** - * Destructor - */ - virtual ~GridBarrierLifetime() - { - HostReset(); - } - - - /** - * Sets up the progress counters for the next kernel launch (lazily - * allocating and initializing them if necessary) - */ - cudaError_t Setup(int sweep_grid_size) - { - cudaError_t retval = cudaSuccess; - do { - size_t new_sync_bytes = sweep_grid_size * sizeof(SyncFlag); - if (new_sync_bytes > sync_bytes) - { - if (d_sync) - { - if (CubDebug(retval = cudaFree(d_sync))) break; - } - - sync_bytes = new_sync_bytes; - - // Allocate and initialize to zero - if (CubDebug(retval = cudaMalloc((void**) &d_sync, sync_bytes))) break; - if (CubDebug(retval = cudaMemset(d_sync, 0, new_sync_bytes))) break; - } - } while (0); - - return retval; - } -}; - - -/** @} */ // end group GridModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/grid/grid_even_share.cuh b/kokkos/kokkos/TPL/cub/grid/grid_even_share.cuh deleted file mode 100644 index defe9e0..0000000 --- a/kokkos/kokkos/TPL/cub/grid/grid_even_share.cuh +++ /dev/null @@ -1,197 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::GridEvenShare is a descriptor utility for distributing input among CUDA threadblocks in an "even-share" fashion. Each threadblock gets roughly the same number of fixed-size work units (grains). - */ - - -#pragma once - -#include "../util_namespace.cuh" -#include "../util_macro.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup GridModule - * @{ - */ - - -/** - * \brief GridEvenShare is a descriptor utility for distributing input among CUDA threadblocks in an "even-share" fashion. Each threadblock gets roughly the same number of fixed-size work units (grains). - * - * \par Overview - * GridEvenShare indicates which sections of input are to be mapped onto which threadblocks. - * Threadblocks may receive one of three different amounts of work: "big", "normal", - * and "last". The "big" workloads are one scheduling grain larger than "normal". The "last" work unit - * for the last threadblock may be partially-full if the input is not an even multiple of - * the scheduling grain size. - * - * \par - * Before invoking a child grid, a parent thread will typically construct and initialize an instance of - * GridEvenShare using \p GridInit(). The instance can be passed to child threadblocks which can - * initialize their per-threadblock offsets using \p BlockInit(). - * - * \tparam SizeT Integer type for array indexing - */ -template -class GridEvenShare -{ -private: - - SizeT total_grains; - int big_blocks; - SizeT big_share; - SizeT normal_share; - SizeT normal_base_offset; - - -public: - - /// Total number of input items - SizeT num_items; - - /// Grid size in threadblocks - int grid_size; - - /// Offset into input marking the beginning of the owning thread block's segment of input tiles - SizeT block_offset; - - /// Offset into input of marking the end (one-past) of the owning thread block's segment of input tiles - SizeT block_oob; - - /** - * \brief Block-based constructor for single-block grids. - */ - __device__ __forceinline__ GridEvenShare(SizeT num_items) : - num_items(num_items), - grid_size(1), - block_offset(0), - block_oob(num_items) {} - - - /** - * \brief Default constructor. Zero-initializes block-specific fields. - */ - __host__ __device__ __forceinline__ GridEvenShare() : - num_items(0), - grid_size(0), - block_offset(0), - block_oob(0) {} - - - /** - * \brief Initializes the grid-specific members \p num_items and \p grid_size. To be called prior prior to kernel launch) - */ - __host__ __device__ __forceinline__ void GridInit( - SizeT num_items, ///< Total number of input items - int max_grid_size, ///< Maximum grid size allowable (actual grid size may be less if not warranted by the the number of input items) - int schedule_granularity) ///< Granularity by which the input can be parcelled into and distributed among threablocks. Usually the thread block's native tile size (or a multiple thereof. - { - this->num_items = num_items; - this->block_offset = 0; - this->block_oob = 0; - this->total_grains = (num_items + schedule_granularity - 1) / schedule_granularity; - this->grid_size = CUB_MIN(total_grains, max_grid_size); - SizeT grains_per_block = total_grains / grid_size; - this->big_blocks = total_grains - (grains_per_block * grid_size); // leftover grains go to big blocks - this->normal_share = grains_per_block * schedule_granularity; - this->normal_base_offset = big_blocks * schedule_granularity; - this->big_share = normal_share + schedule_granularity; - } - - - /** - * \brief Initializes the threadblock-specific details (e.g., to be called by each threadblock after startup) - */ - __device__ __forceinline__ void BlockInit() - { - if (blockIdx.x < big_blocks) - { - // This threadblock gets a big share of grains (grains_per_block + 1) - block_offset = (blockIdx.x * big_share); - block_oob = block_offset + big_share; - } - else if (blockIdx.x < total_grains) - { - // This threadblock gets a normal share of grains (grains_per_block) - block_offset = normal_base_offset + (blockIdx.x * normal_share); - block_oob = block_offset + normal_share; - } - - // Last threadblock - if (blockIdx.x == grid_size - 1) - { - block_oob = num_items; - } - } - - - /** - * Print to stdout - */ - __host__ __device__ __forceinline__ void Print() - { - printf( -#ifdef __CUDA_ARCH__ - "\tthreadblock(%d) " - "block_offset(%lu) " - "block_oob(%lu) " -#endif - "num_items(%lu) " - "total_grains(%lu) " - "big_blocks(%lu) " - "big_share(%lu) " - "normal_share(%lu)\n", -#ifdef __CUDA_ARCH__ - blockIdx.x, - (unsigned long) block_offset, - (unsigned long) block_oob, -#endif - (unsigned long) num_items, - (unsigned long) total_grains, - (unsigned long) big_blocks, - (unsigned long) big_share, - (unsigned long) normal_share); - } -}; - - - -/** @} */ // end group GridModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/grid/grid_mapping.cuh b/kokkos/kokkos/TPL/cub/grid/grid_mapping.cuh deleted file mode 100644 index 419f9ac..0000000 --- a/kokkos/kokkos/TPL/cub/grid/grid_mapping.cuh +++ /dev/null @@ -1,95 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device-wide data onto a grid of CUDA thread blocks. - */ - -#pragma once - -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup GridModule - * @{ - */ - - -/****************************************************************************** - * Mapping policies - *****************************************************************************/ - - -/** - * \brief cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device-wide data onto a grid of CUDA thread blocks. - */ -enum GridMappingStrategy -{ - /** - * \brief An "even-share" strategy for assigning input tiles to thread blocks. - * - * \par Overview - * The input is evenly partitioned into \p p segments, where \p p is - * constant and corresponds loosely to the number of thread blocks that may - * actively reside on the target device. Each segment is comprised of - * consecutive tiles, where a tile is a small, constant-sized unit of input - * to be processed to completion before the thread block terminates or - * obtains more work. The kernel invokes \p p thread blocks, each - * of which iteratively consumes a segment of n/p elements - * in tile-size increments. - */ - GRID_MAPPING_EVEN_SHARE, - - /** - * \brief A dynamic "queue-based" strategy for assigning input tiles to thread blocks. - * - * \par Overview - * The input is treated as a queue to be dynamically consumed by a grid of - * thread blocks. Work is atomically dequeued in tiles, where a tile is a - * unit of input to be processed to completion before the thread block - * terminates or obtains more work. The grid size \p p is constant, - * loosely corresponding to the number of thread blocks that may actively - * reside on the target device. - */ - GRID_MAPPING_DYNAMIC, -}; - - -/** @} */ // end group GridModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/grid/grid_queue.cuh b/kokkos/kokkos/TPL/cub/grid/grid_queue.cuh deleted file mode 100644 index 009260d..0000000 --- a/kokkos/kokkos/TPL/cub/grid/grid_queue.cuh +++ /dev/null @@ -1,207 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::GridQueue is a descriptor utility for dynamic queue management. - */ - -#pragma once - -#include "../util_namespace.cuh" -#include "../util_debug.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup GridModule - * @{ - */ - - -/** - * \brief GridQueue is a descriptor utility for dynamic queue management. - * - * \par Overview - * GridQueue descriptors provides abstractions for "filling" or - * "draining" globally-shared vectors. - * - * \par - * A "filling" GridQueue works by atomically-adding to a zero-initialized counter, - * returning a unique offset for the calling thread to write its items. - * The GridQueue maintains the total "fill-size". The fill counter must be reset - * using GridQueue::ResetFill by the host or kernel instance prior to the kernel instance that - * will be filling. - * - * \par - * Similarly a "draining" GridQueue works by works by atomically-incrementing a - * zero-initialized counter, returning a unique offset for the calling thread to - * read its items. Threads can safely drain until the array's logical fill-size is - * exceeded. The drain counter must be reset using GridQueue::ResetDrain or - * GridQueue::ResetDrainAfterFill by the host or kernel instance prior to the kernel instance that - * will be filling. (For dynamic work distribution of existing data, the corresponding fill-size - * is simply the number of elements in the array.) - * - * \par - * Iterative work management can be implemented simply with a pair of flip-flopping - * work buffers, each with an associated set of fill and drain GridQueue descriptors. - * - * \tparam SizeT Integer type for array indexing - */ -template -class GridQueue -{ -private: - - /// Counter indices - enum - { - FILL = 0, - DRAIN = 1, - }; - - /// Pair of counters - SizeT *d_counters; - -public: - - /// Returns the device allocation size in bytes needed to construct a GridQueue instance - __host__ __device__ __forceinline__ - static size_t AllocationSize() - { - return sizeof(SizeT) * 2; - } - - - /// Constructs an invalid GridQueue descriptor around the device storage allocation - __host__ __device__ __forceinline__ GridQueue( - void *d_storage) ///< Device allocation to back the GridQueue. Must be at least as big as AllocationSize(). - : - d_counters((SizeT*) d_storage) - {} - - - /// This operation resets the drain so that it may advance to meet the existing fill-size. To be called by the host or by a kernel prior to that which will be draining. - __host__ __device__ __forceinline__ cudaError_t ResetDrainAfterFill(cudaStream_t stream = 0) - { -#ifdef __CUDA_ARCH__ - d_counters[DRAIN] = 0; - return cudaSuccess; -#else - return ResetDrain(0, stream); -#endif - } - - /// This operation sets the fill-size and resets the drain counter, preparing the GridQueue for draining in the next kernel instance. To be called by the host or by a kernel prior to that which will be draining. - __host__ __device__ __forceinline__ cudaError_t ResetDrain( - SizeT fill_size, - cudaStream_t stream = 0) - { -#ifdef __CUDA_ARCH__ - d_counters[FILL] = fill_size; - d_counters[DRAIN] = 0; - return cudaSuccess; -#else - SizeT counters[2]; - counters[FILL] = fill_size; - counters[DRAIN] = 0; - return CubDebug(cudaMemcpyAsync(d_counters, counters, sizeof(SizeT) * 2, cudaMemcpyHostToDevice, stream)); -#endif - } - - - /// This operation resets the fill counter. To be called by the host or by a kernel prior to that which will be filling. - __host__ __device__ __forceinline__ cudaError_t ResetFill() - { -#ifdef __CUDA_ARCH__ - d_counters[FILL] = 0; - return cudaSuccess; -#else - return CubDebug(cudaMemset(d_counters + FILL, 0, sizeof(SizeT))); -#endif - } - - - /// Returns the fill-size established by the parent or by the previous kernel. - __host__ __device__ __forceinline__ cudaError_t FillSize( - SizeT &fill_size, - cudaStream_t stream = 0) - { -#ifdef __CUDA_ARCH__ - fill_size = d_counters[FILL]; -#else - return CubDebug(cudaMemcpyAsync(&fill_size, d_counters + FILL, sizeof(SizeT), cudaMemcpyDeviceToHost, stream)); -#endif - } - - - /// Drain num_items. Returns offset from which to read items. - __device__ __forceinline__ SizeT Drain(SizeT num_items) - { - return atomicAdd(d_counters + DRAIN, num_items); - } - - - /// Fill num_items. Returns offset from which to write items. - __device__ __forceinline__ SizeT Fill(SizeT num_items) - { - return atomicAdd(d_counters + FILL, num_items); - } -}; - - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - -/** - * Reset grid queue (call with 1 block of 1 thread) - */ -template -__global__ void ResetDrainKernel( - GridQueue grid_queue, - SizeT num_items) -{ - grid_queue.ResetDrain(num_items); -} - - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/** @} */ // end group GridModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - - diff --git a/kokkos/kokkos/TPL/cub/host/spinlock.cuh b/kokkos/kokkos/TPL/cub/host/spinlock.cuh deleted file mode 100644 index 5621b6f..0000000 --- a/kokkos/kokkos/TPL/cub/host/spinlock.cuh +++ /dev/null @@ -1,123 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Simple x86/x64 atomic spinlock, portable across MS Windows (cl.exe) & Linux (g++) - */ - - -#pragma once - -#if defined(_WIN32) || defined(_WIN64) - #include - #include - #undef small // Windows is terrible for polluting macro namespace - - /** - * Compiler read/write barrier - */ - #pragma intrinsic(_ReadWriteBarrier) - -#endif - -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -#if defined(_MSC_VER) - - // Microsoft VC++ - typedef long Spinlock; - -#else - - // GNU g++ - typedef int Spinlock; - - /** - * Compiler read/write barrier - */ - __forceinline__ void _ReadWriteBarrier() - { - __sync_synchronize(); - } - - /** - * Atomic exchange - */ - __forceinline__ long _InterlockedExchange(volatile int * const Target, const int Value) - { - // NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier - _ReadWriteBarrier(); - return __sync_lock_test_and_set(Target, Value); - } - - /** - * Pause instruction to prevent excess processor bus usage - */ - __forceinline__ void YieldProcessor() - { -#ifndef __arm__ - asm volatile("pause\n": : :"memory"); -#endif // __arm__ - } - -#endif // defined(_MSC_VER) - -/** - * Return when the specified spinlock has been acquired - */ -__forceinline__ void Lock(volatile Spinlock *lock) -{ - while (1) - { - if (!_InterlockedExchange(lock, 1)) return; - while (*lock) YieldProcessor(); - } -} - - -/** - * Release the specified spinlock - */ -__forceinline__ void Unlock(volatile Spinlock *lock) -{ - _ReadWriteBarrier(); - *lock = 0; -} - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) - diff --git a/kokkos/kokkos/TPL/cub/thread/thread_load.cuh b/kokkos/kokkos/TPL/cub/thread/thread_load.cuh deleted file mode 100644 index ee112b9..0000000 --- a/kokkos/kokkos/TPL/cub/thread/thread_load.cuh +++ /dev/null @@ -1,429 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Thread utilities for reading memory using PTX cache modifiers. - */ - -#pragma once - -#include - -#include - -#include "../util_ptx.cuh" -#include "../util_type.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \addtogroup IoModule - * @{ - */ - -//----------------------------------------------------------------------------- -// Tags and constants -//----------------------------------------------------------------------------- - -/** - * \brief Enumeration of PTX cache-modifiers for memory load operations. - */ -enum PtxLoadModifier -{ - LOAD_DEFAULT, ///< Default (no modifier) - LOAD_CA, ///< Cache at all levels - LOAD_CG, ///< Cache at global level - LOAD_CS, ///< Cache streaming (likely to be accessed once) - LOAD_CV, ///< Cache as volatile (including cached system lines) - LOAD_LDG, ///< Cache as texture - LOAD_VOLATILE, ///< Volatile (any memory space) -}; - - -/** - * \name Simple I/O - * @{ - */ - -/** - * \brief Thread utility for reading memory using cub::PtxLoadModifier cache modifiers. - * - * Cache modifiers will only be effected for built-in types (i.e., C++ - * primitives and CUDA vector-types). - * - * For example: - * \par - * \code - * #include - * - * // 32-bit load using cache-global modifier: - * int *d_in; - * int val = cub::ThreadLoad(d_in + threadIdx.x); - * - * // 16-bit load using default modifier - * short *d_in; - * short val = cub::ThreadLoad(d_in + threadIdx.x); - * - * // 256-bit load using cache-volatile modifier - * double4 *d_in; - * double4 val = cub::ThreadLoad(d_in + threadIdx.x); - * - * // 96-bit load using default cache modifier (ignoring LOAD_CS) - * struct TestFoo { bool a; short b; }; - * TestFoo *d_struct; - * TestFoo val = cub::ThreadLoad(d_in + threadIdx.x); - * \endcode - * - */ -template < - PtxLoadModifier MODIFIER, - typename InputIteratorRA> -__device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad(InputIteratorRA itr); - - -//@} end member group - - - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - -/** - * Define a int4 (16B) ThreadLoad specialization for the given PTX load modifier - */ -#define CUB_LOAD_16(cub_modifier, ptx_modifier) \ - template<> \ - __device__ __forceinline__ int4 ThreadLoad(int4* ptr) \ - { \ - int4 retval; \ - asm volatile ("ld."#ptx_modifier".v4.s32 {%0, %1, %2, %3}, [%4];" : \ - "=r"(retval.x), \ - "=r"(retval.y), \ - "=r"(retval.z), \ - "=r"(retval.w) : \ - _CUB_ASM_PTR_(ptr)); \ - return retval; \ - } \ - template<> \ - __device__ __forceinline__ longlong2 ThreadLoad(longlong2* ptr) \ - { \ - longlong2 retval; \ - asm volatile ("ld."#ptx_modifier".v2.s64 {%0, %1}, [%2];" : \ - "=l"(retval.x), \ - "=l"(retval.y) : \ - _CUB_ASM_PTR_(ptr)); \ - return retval; \ - } - -/** - * Define a int2 (8B) ThreadLoad specialization for the given PTX load modifier - */ -#define CUB_LOAD_8(cub_modifier, ptx_modifier) \ - template<> \ - __device__ __forceinline__ short4 ThreadLoad(short4* ptr) \ - { \ - short4 retval; \ - asm volatile ("ld."#ptx_modifier".v4.s16 {%0, %1, %2, %3}, [%4];" : \ - "=h"(retval.x), \ - "=h"(retval.y), \ - "=h"(retval.z), \ - "=h"(retval.w) : \ - _CUB_ASM_PTR_(ptr)); \ - return retval; \ - } \ - template<> \ - __device__ __forceinline__ int2 ThreadLoad(int2* ptr) \ - { \ - int2 retval; \ - asm volatile ("ld."#ptx_modifier".v2.s32 {%0, %1}, [%2];" : \ - "=r"(retval.x), \ - "=r"(retval.y) : \ - _CUB_ASM_PTR_(ptr)); \ - return retval; \ - } \ - template<> \ - __device__ __forceinline__ long long ThreadLoad(long long* ptr) \ - { \ - long long retval; \ - asm volatile ("ld."#ptx_modifier".s64 %0, [%1];" : \ - "=l"(retval) : \ - _CUB_ASM_PTR_(ptr)); \ - return retval; \ - } - -/** - * Define a int (4B) ThreadLoad specialization for the given PTX load modifier - */ -#define CUB_LOAD_4(cub_modifier, ptx_modifier) \ - template<> \ - __device__ __forceinline__ int ThreadLoad(int* ptr) \ - { \ - int retval; \ - asm volatile ("ld."#ptx_modifier".s32 %0, [%1];" : \ - "=r"(retval) : \ - _CUB_ASM_PTR_(ptr)); \ - return retval; \ - } - - -/** - * Define a short (2B) ThreadLoad specialization for the given PTX load modifier - */ -#define CUB_LOAD_2(cub_modifier, ptx_modifier) \ - template<> \ - __device__ __forceinline__ short ThreadLoad(short* ptr) \ - { \ - short retval; \ - asm volatile ("ld."#ptx_modifier".s16 %0, [%1];" : \ - "=h"(retval) : \ - _CUB_ASM_PTR_(ptr)); \ - return retval; \ - } - - -/** - * Define a char (1B) ThreadLoad specialization for the given PTX load modifier - */ -#define CUB_LOAD_1(cub_modifier, ptx_modifier) \ - template<> \ - __device__ __forceinline__ char ThreadLoad(char* ptr) \ - { \ - short retval; \ - asm volatile ( \ - "{" \ - " .reg .s8 datum;" \ - " ld."#ptx_modifier".s8 datum, [%1];" \ - " cvt.s16.s8 %0, datum;" \ - "}" : \ - "=h"(retval) : \ - _CUB_ASM_PTR_(ptr)); \ - return (char) retval; \ - } - - -/** - * Define powers-of-two ThreadLoad specializations for the given PTX load modifier - */ -#define CUB_LOAD_ALL(cub_modifier, ptx_modifier) \ - CUB_LOAD_16(cub_modifier, ptx_modifier) \ - CUB_LOAD_8(cub_modifier, ptx_modifier) \ - CUB_LOAD_4(cub_modifier, ptx_modifier) \ - CUB_LOAD_2(cub_modifier, ptx_modifier) \ - CUB_LOAD_1(cub_modifier, ptx_modifier) \ - - -/** - * Define ThreadLoad specializations for the various PTX load modifiers - */ -#if CUB_PTX_ARCH >= 200 - CUB_LOAD_ALL(LOAD_CA, ca) - CUB_LOAD_ALL(LOAD_CG, cg) - CUB_LOAD_ALL(LOAD_CS, cs) - CUB_LOAD_ALL(LOAD_CV, cv) -#else - // LOAD_CV on SM10-13 uses "volatile.global" to ensure reads from last level - CUB_LOAD_ALL(LOAD_CV, volatile.global) -#endif -#if CUB_PTX_ARCH >= 350 - CUB_LOAD_ALL(LOAD_LDG, global.nc) -#endif - - -/// Helper structure for templated load iteration (inductive case) -template -struct IterateThreadLoad -{ - template - static __device__ __forceinline__ void Load(T *ptr, T *vals) - { - vals[COUNT] = ThreadLoad(ptr + COUNT); - IterateThreadLoad::Load(ptr, vals); - } -}; - -/// Helper structure for templated load iteration (termination case) -template -struct IterateThreadLoad -{ - template - static __device__ __forceinline__ void Load(T *ptr, T *vals) {} -}; - - - -/** - * Load with LOAD_DEFAULT on iterator types - */ -template -__device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad( - InputIteratorRA itr, - Int2Type modifier, - Int2Type is_pointer) -{ - return *itr; -} - - -/** - * Load with LOAD_DEFAULT on pointer types - */ -template -__device__ __forceinline__ T ThreadLoad( - T *ptr, - Int2Type modifier, - Int2Type is_pointer) -{ - return *ptr; -} - - -/** - * Load with LOAD_VOLATILE on primitive pointer types - */ -template -__device__ __forceinline__ T ThreadLoadVolatile( - T *ptr, - Int2Type is_primitive) -{ - T retval = *reinterpret_cast(ptr); - -#if (CUB_PTX_ARCH <= 130) - if (sizeof(T) == 1) __threadfence_block(); -#endif - - return retval; -} - - -/** - * Load with LOAD_VOLATILE on non-primitive pointer types - */ -template -__device__ __forceinline__ T ThreadLoadVolatile( - T *ptr, - Int2Type is_primitive) -{ - typedef typename WordAlignment::VolatileWord VolatileWord; // Word type for memcopying - enum { NUM_WORDS = sizeof(T) / sizeof(VolatileWord) }; - - // Memcopy from aliased source into array of uninitialized words - typename WordAlignment::UninitializedVolatileWords words; - - #pragma unroll - for (int i = 0; i < NUM_WORDS; ++i) - words.buf[i] = reinterpret_cast(ptr)[i]; - - // Load from words - return *reinterpret_cast(words.buf); -} - - -/** - * Load with LOAD_VOLATILE on pointer types - */ -template -__device__ __forceinline__ T ThreadLoad( - T *ptr, - Int2Type modifier, - Int2Type is_pointer) -{ - return ThreadLoadVolatile(ptr, Int2Type::PRIMITIVE>()); -} - - -#if (CUB_PTX_ARCH <= 130) - -/** - * Load with LOAD_CG uses LOAD_CV in pre-SM20 PTX to ensure coherent reads when run on newer architectures with L1 - */ -template -__device__ __forceinline__ T ThreadLoad( - T *ptr, - Int2Type modifier, - Int2Type is_pointer) -{ - return ThreadLoad(ptr); -} - -#endif // (CUB_PTX_ARCH <= 130) - - -/** - * Load with arbitrary MODIFIER on pointer types - */ -template -__device__ __forceinline__ T ThreadLoad( - T *ptr, - Int2Type modifier, - Int2Type is_pointer) -{ - typedef typename WordAlignment::DeviceWord DeviceWord; - enum { NUM_WORDS = sizeof(T) / sizeof(DeviceWord) }; - - // Memcopy from aliased source into array of uninitialized words - typename WordAlignment::UninitializedDeviceWords words; - - IterateThreadLoad::Load( - reinterpret_cast(ptr), - words.buf); - - // Load from words - return *reinterpret_cast(words.buf); -} - - -/** - * Generic ThreadLoad definition - */ -template < - PtxLoadModifier MODIFIER, - typename InputIteratorRA> -__device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad(InputIteratorRA itr) -{ - return ThreadLoad( - itr, - Int2Type(), - Int2Type::VALUE>()); -} - - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/** @} */ // end group IoModule - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/thread/thread_operators.cuh b/kokkos/kokkos/TPL/cub/thread/thread_operators.cuh deleted file mode 100644 index bfb3d7c..0000000 --- a/kokkos/kokkos/TPL/cub/thread/thread_operators.cuh +++ /dev/null @@ -1,145 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Simple binary operator functor types - */ - -/****************************************************************************** - * Simple functor operators - ******************************************************************************/ - -#pragma once - -#include "../util_macro.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup ThreadModule - * @{ - */ - -/** - * \brief Default equality functor - */ -struct Equality -{ - /// Boolean equality operator, returns (a == b) - template - __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) - { - return a == b; - } -}; - - -/** - * \brief Default inequality functor - */ -struct Inequality -{ - /// Boolean inequality operator, returns (a != b) - template - __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) - { - return a != b; - } -}; - - -/** - * \brief Default sum functor - */ -struct Sum -{ - /// Boolean sum operator, returns a + b - template - __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) - { - return a + b; - } -}; - - -/** - * \brief Default max functor - */ -struct Max -{ - /// Boolean max operator, returns (a > b) ? a : b - template - __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) - { - return CUB_MAX(a, b); - } -}; - - -/** - * \brief Default min functor - */ -struct Min -{ - /// Boolean min operator, returns (a < b) ? a : b - template - __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) - { - return CUB_MIN(a, b); - } -}; - - -/** - * \brief Default cast functor - */ -template -struct Cast -{ - /// Boolean max operator, returns (a > b) ? a : b - template - __host__ __device__ __forceinline__ B operator()(const A &a) - { - return (B) a; - } -}; - - - -/** @} */ // end group ThreadModule - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/thread/thread_reduce.cuh b/kokkos/kokkos/TPL/cub/thread/thread_reduce.cuh deleted file mode 100644 index 374fd77..0000000 --- a/kokkos/kokkos/TPL/cub/thread/thread_reduce.cuh +++ /dev/null @@ -1,145 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Thread utilities for sequential reduction over statically-sized array types - */ - -#pragma once - -#include "../thread/thread_operators.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \addtogroup ThreadModule - * @{ - */ - -/** - * \name Sequential reduction over statically-sized array types - * @{ - */ - -/** - * \brief Perform a sequential reduction over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. - * - * \tparam LENGTH Length of input array - * \tparam T [inferred] The data type to be reduced. - * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ -template < - int LENGTH, - typename T, - typename ReductionOp> -__device__ __forceinline__ T ThreadReduce( - T* input, ///< [in] Input array - ReductionOp reduction_op, ///< [in] Binary reduction operator - T prefix) ///< [in] Prefix to seed reduction with -{ - #pragma unroll - for (int i = 0; i < LENGTH; ++i) - { - prefix = reduction_op(prefix, input[i]); - } - - return prefix; -} - - -/** - * \brief Perform a sequential reduction over \p LENGTH elements of the \p input array. The aggregate is returned. - * - * \tparam LENGTH Length of input array - * \tparam T [inferred] The data type to be reduced. - * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ -template < - int LENGTH, - typename T, - typename ReductionOp> -__device__ __forceinline__ T ThreadReduce( - T* input, ///< [in] Input array - ReductionOp reduction_op) ///< [in] Binary reduction operator -{ - T prefix = input[0]; - return ThreadReduce(input + 1, reduction_op, prefix); -} - - -/** - * \brief Perform a sequential reduction over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. - * - * \tparam LENGTH [inferred] Length of \p input array - * \tparam T [inferred] The data type to be reduced. - * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ -template < - int LENGTH, - typename T, - typename ReductionOp> -__device__ __forceinline__ T ThreadReduce( - T (&input)[LENGTH], ///< [in] Input array - ReductionOp reduction_op, ///< [in] Binary reduction operator - T prefix) ///< [in] Prefix to seed reduction with -{ - return ThreadReduce(input, reduction_op, prefix); -} - - -/** - * \brief Serial reduction with the specified operator - * - * \tparam LENGTH [inferred] Length of \p input array - * \tparam T [inferred] The data type to be reduced. - * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ -template < - int LENGTH, - typename T, - typename ReductionOp> -__device__ __forceinline__ T ThreadReduce( - T (&input)[LENGTH], ///< [in] Input array - ReductionOp reduction_op) ///< [in] Binary reduction operator -{ - return ThreadReduce((T*) input, reduction_op); -} - - -//@} end member group - -/** @} */ // end group ThreadModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/thread/thread_scan.cuh b/kokkos/kokkos/TPL/cub/thread/thread_scan.cuh deleted file mode 100644 index b43bbcf..0000000 --- a/kokkos/kokkos/TPL/cub/thread/thread_scan.cuh +++ /dev/null @@ -1,231 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Thread utilities for sequential prefix scan over statically-sized array types - */ - -#pragma once - -#include "../thread/thread_operators.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \addtogroup ThreadModule - * @{ - */ - -/** - * \name Sequential prefix scan over statically-sized array types - * @{ - */ - -/** - * \brief Perform a sequential exclusive prefix scan over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. - * - * \tparam LENGTH Length of \p input and \p output arrays - * \tparam T [inferred] The data type to be scanned. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ -template < - int LENGTH, - typename T, - typename ScanOp> -__device__ __forceinline__ T ThreadScanExclusive( - T *input, ///< [in] Input array - T *output, ///< [out] Output array (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T prefix, ///< [in] Prefix to seed scan with - bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. If not, the first output element is undefined. (Handy for preventing thread-0 from applying a prefix.) -{ - T inclusive = input[0]; - if (apply_prefix) - { - inclusive = scan_op(prefix, inclusive); - } - output[0] = prefix; - T exclusive = inclusive; - - #pragma unroll - for (int i = 1; i < LENGTH; ++i) - { - inclusive = scan_op(exclusive, input[i]); - output[i] = exclusive; - exclusive = inclusive; - } - - return inclusive; -} - - -/** - * \brief Perform a sequential exclusive prefix scan over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. - * - * \tparam LENGTH [inferred] Length of \p input and \p output arrays - * \tparam T [inferred] The data type to be scanned. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ -template < - int LENGTH, - typename T, - typename ScanOp> -__device__ __forceinline__ T ThreadScanExclusive( - T (&input)[LENGTH], ///< [in] Input array - T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T prefix, ///< [in] Prefix to seed scan with - bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) -{ - return ThreadScanExclusive((T*) input, (T*) output, scan_op, prefix); -} - - -/** - * \brief Perform a sequential inclusive prefix scan over \p LENGTH elements of the \p input array. The aggregate is returned. - * - * \tparam LENGTH Length of \p input and \p output arrays - * \tparam T [inferred] The data type to be scanned. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ -template < - int LENGTH, - typename T, - typename ScanOp> -__device__ __forceinline__ T ThreadScanInclusive( - T *input, ///< [in] Input array - T *output, ///< [out] Output array (may be aliased to \p input) - ScanOp scan_op) ///< [in] Binary scan operator -{ - T inclusive = input[0]; - output[0] = inclusive; - - // Continue scan - #pragma unroll - for (int i = 0; i < LENGTH; ++i) - { - inclusive = scan_op(inclusive, input[i]); - output[i] = inclusive; - } - - return inclusive; -} - - -/** - * \brief Perform a sequential inclusive prefix scan over the statically-sized \p input array. The aggregate is returned. - * - * \tparam LENGTH [inferred] Length of \p input and \p output arrays - * \tparam T [inferred] The data type to be scanned. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ -template < - int LENGTH, - typename T, - typename ScanOp> -__device__ __forceinline__ T ThreadScanInclusive( - T (&input)[LENGTH], ///< [in] Input array - T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) - ScanOp scan_op) ///< [in] Binary scan operator -{ - return ThreadScanInclusive((T*) input, (T*) output, scan_op); -} - - -/** - * \brief Perform a sequential inclusive prefix scan over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. - * - * \tparam LENGTH Length of \p input and \p output arrays - * \tparam T [inferred] The data type to be scanned. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ -template < - int LENGTH, - typename T, - typename ScanOp> -__device__ __forceinline__ T ThreadScanInclusive( - T *input, ///< [in] Input array - T *output, ///< [out] Output array (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T prefix, ///< [in] Prefix to seed scan with - bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) -{ - T inclusive = input[0]; - if (apply_prefix) - { - inclusive = scan_op(prefix, inclusive); - } - output[0] = inclusive; - - // Continue scan - #pragma unroll - for (int i = 1; i < LENGTH; ++i) - { - inclusive = scan_op(inclusive, input[i]); - output[i] = inclusive; - } - - return inclusive; -} - - -/** - * \brief Perform a sequential inclusive prefix scan over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. - * - * \tparam LENGTH [inferred] Length of \p input and \p output arrays - * \tparam T [inferred] The data type to be scanned. - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ -template < - int LENGTH, - typename T, - typename ScanOp> -__device__ __forceinline__ T ThreadScanInclusive( - T (&input)[LENGTH], ///< [in] Input array - T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) - ScanOp scan_op, ///< [in] Binary scan operator - T prefix, ///< [in] Prefix to seed scan with - bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) -{ - return ThreadScanInclusive((T*) input, (T*) output, scan_op, prefix, apply_prefix); -} - - -//@} end member group - -/** @} */ // end group ThreadModule - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/thread/thread_store.cuh b/kokkos/kokkos/TPL/cub/thread/thread_store.cuh deleted file mode 100644 index 8d39e07..0000000 --- a/kokkos/kokkos/TPL/cub/thread/thread_store.cuh +++ /dev/null @@ -1,412 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Thread utilities for writing memory using PTX cache modifiers. - */ - -#pragma once - -#include - -#include "../util_ptx.cuh" -#include "../util_type.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \addtogroup IoModule - * @{ - */ - - -//----------------------------------------------------------------------------- -// Tags and constants -//----------------------------------------------------------------------------- - -/** - * \brief Enumeration of PTX cache-modifiers for memory store operations. - */ -enum PtxStoreModifier -{ - STORE_DEFAULT, ///< Default (no modifier) - STORE_WB, ///< Cache write-back all coherent levels - STORE_CG, ///< Cache at global level - STORE_CS, ///< Cache streaming (likely to be accessed once) - STORE_WT, ///< Cache write-through (to system memory) - STORE_VOLATILE, ///< Volatile shared (any memory space) -}; - - -/** - * \name Simple I/O - * @{ - */ - -/** - * \brief Thread utility for writing memory using cub::PtxStoreModifier cache modifiers. - * - * Cache modifiers will only be effected for built-in types (i.e., C++ - * primitives and CUDA vector-types). - * - * For example: - * \par - * \code - * #include - * - * // 32-bit store using cache-global modifier: - * int *d_out; - * int val; - * cub::ThreadStore(d_out + threadIdx.x, val); - * - * // 16-bit store using default modifier - * short *d_out; - * short val; - * cub::ThreadStore(d_out + threadIdx.x, val); - * - * // 256-bit store using write-through modifier - * double4 *d_out; - * double4 val; - * cub::ThreadStore(d_out + threadIdx.x, val); - * - * // 96-bit store using default cache modifier (ignoring STORE_CS) - * struct TestFoo { bool a; short b; }; - * TestFoo *d_struct; - * TestFoo val; - * cub::ThreadStore(d_out + threadIdx.x, val); - * \endcode - * - */ -template < - PtxStoreModifier MODIFIER, - typename OutputIteratorRA, - typename T> -__device__ __forceinline__ void ThreadStore(OutputIteratorRA itr, T val); - - -//@} end member group - - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - -/** - * Define a int4 (16B) ThreadStore specialization for the given PTX load modifier - */ -#define CUB_STORE_16(cub_modifier, ptx_modifier) \ - template<> \ - __device__ __forceinline__ void ThreadStore(int4* ptr, int4 val) \ - { \ - asm volatile ("st."#ptx_modifier".v4.s32 [%0], {%1, %2, %3, %4};" : : \ - _CUB_ASM_PTR_(ptr), \ - "r"(val.x), \ - "r"(val.y), \ - "r"(val.z), \ - "r"(val.w)); \ - } \ - template<> \ - __device__ __forceinline__ void ThreadStore(longlong2* ptr, longlong2 val) \ - { \ - asm volatile ("st."#ptx_modifier".v2.s64 [%0], {%1, %2};" : : \ - _CUB_ASM_PTR_(ptr), \ - "l"(val.x), \ - "l"(val.y)); \ - } - - -/** - * Define a int2 (8B) ThreadStore specialization for the given PTX load modifier - */ -#define CUB_STORE_8(cub_modifier, ptx_modifier) \ - template<> \ - __device__ __forceinline__ void ThreadStore(short4* ptr, short4 val) \ - { \ - asm volatile ("st."#ptx_modifier".v4.s16 [%0], {%1, %2, %3, %4};" : : \ - _CUB_ASM_PTR_(ptr), \ - "h"(val.x), \ - "h"(val.y), \ - "h"(val.z), \ - "h"(val.w)); \ - } \ - template<> \ - __device__ __forceinline__ void ThreadStore(int2* ptr, int2 val) \ - { \ - asm volatile ("st."#ptx_modifier".v2.s32 [%0], {%1, %2};" : : \ - _CUB_ASM_PTR_(ptr), \ - "r"(val.x), \ - "r"(val.y)); \ - } \ - template<> \ - __device__ __forceinline__ void ThreadStore(long long* ptr, long long val) \ - { \ - asm volatile ("st."#ptx_modifier".s64 [%0], %1;" : : \ - _CUB_ASM_PTR_(ptr), \ - "l"(val)); \ - } - -/** - * Define a int (4B) ThreadStore specialization for the given PTX load modifier - */ -#define CUB_STORE_4(cub_modifier, ptx_modifier) \ - template<> \ - __device__ __forceinline__ void ThreadStore(int* ptr, int val) \ - { \ - asm volatile ("st."#ptx_modifier".s32 [%0], %1;" : : \ - _CUB_ASM_PTR_(ptr), \ - "r"(val)); \ - } - - -/** - * Define a short (2B) ThreadStore specialization for the given PTX load modifier - */ -#define CUB_STORE_2(cub_modifier, ptx_modifier) \ - template<> \ - __device__ __forceinline__ void ThreadStore(short* ptr, short val) \ - { \ - asm volatile ("st."#ptx_modifier".s16 [%0], %1;" : : \ - _CUB_ASM_PTR_(ptr), \ - "h"(val)); \ - } - - -/** - * Define a char (1B) ThreadStore specialization for the given PTX load modifier - */ -#define CUB_STORE_1(cub_modifier, ptx_modifier) \ - template<> \ - __device__ __forceinline__ void ThreadStore(char* ptr, char val) \ - { \ - asm volatile ( \ - "{" \ - " .reg .s8 datum;" \ - " cvt.s8.s16 datum, %1;" \ - " st."#ptx_modifier".s8 [%0], datum;" \ - "}" : : \ - _CUB_ASM_PTR_(ptr), \ - "h"(short(val))); \ - } - -/** - * Define powers-of-two ThreadStore specializations for the given PTX load modifier - */ -#define CUB_STORE_ALL(cub_modifier, ptx_modifier) \ - CUB_STORE_16(cub_modifier, ptx_modifier) \ - CUB_STORE_8(cub_modifier, ptx_modifier) \ - CUB_STORE_4(cub_modifier, ptx_modifier) \ - CUB_STORE_2(cub_modifier, ptx_modifier) \ - CUB_STORE_1(cub_modifier, ptx_modifier) \ - - -/** - * Define ThreadStore specializations for the various PTX load modifiers - */ -#if CUB_PTX_ARCH >= 200 - CUB_STORE_ALL(STORE_WB, ca) - CUB_STORE_ALL(STORE_CG, cg) - CUB_STORE_ALL(STORE_CS, cs) - CUB_STORE_ALL(STORE_WT, cv) -#else - // STORE_WT on SM10-13 uses "volatile.global" to ensure writes to last level - CUB_STORE_ALL(STORE_WT, volatile.global) -#endif - - - -/// Helper structure for templated store iteration (inductive case) -template -struct IterateThreadStore -{ - template - static __device__ __forceinline__ void Store(T *ptr, T *vals) - { - ThreadStore(ptr + COUNT, vals[COUNT]); - IterateThreadStore::Store(ptr, vals); - } -}; - -/// Helper structure for templated store iteration (termination case) -template -struct IterateThreadStore -{ - template - static __device__ __forceinline__ void Store(T *ptr, T *vals) {} -}; - - - - -/** - * Store with STORE_DEFAULT on iterator types - */ -template -__device__ __forceinline__ void ThreadStore( - OutputIteratorRA itr, - T val, - Int2Type modifier, - Int2Type is_pointer) -{ - *itr = val; -} - - -/** - * Store with STORE_DEFAULT on pointer types - */ -template -__device__ __forceinline__ void ThreadStore( - T *ptr, - T val, - Int2Type modifier, - Int2Type is_pointer) -{ - *ptr = val; -} - - -/** - * Store with STORE_VOLATILE on primitive pointer types - */ -template -__device__ __forceinline__ void ThreadStoreVolatile( - T *ptr, - T val, - Int2Type is_primitive) -{ - *reinterpret_cast(ptr) = val; -} - - -/** - * Store with STORE_VOLATILE on non-primitive pointer types - */ -template -__device__ __forceinline__ void ThreadStoreVolatile( - T *ptr, - T val, - Int2Type is_primitive) -{ - typedef typename WordAlignment::VolatileWord VolatileWord; // Word type for memcopying - enum { NUM_WORDS = sizeof(T) / sizeof(VolatileWord) }; - - // Store into array of uninitialized words - typename WordAlignment::UninitializedVolatileWords words; - *reinterpret_cast(words.buf) = val; - - // Memcopy words to aliased destination - #pragma unroll - for (int i = 0; i < NUM_WORDS; ++i) - reinterpret_cast(ptr)[i] = words.buf[i]; -} - - -/** - * Store with STORE_VOLATILE on pointer types - */ -template -__device__ __forceinline__ void ThreadStore( - T *ptr, - T val, - Int2Type modifier, - Int2Type is_pointer) -{ - ThreadStoreVolatile(ptr, val, Int2Type::PRIMITIVE>()); -} - - -#if (CUB_PTX_ARCH <= 350) - -/** - * Store with STORE_CG on pointer types (uses STORE_DEFAULT on current architectures) - */ -template -__device__ __forceinline__ void ThreadStore( - T *ptr, - T val, - Int2Type modifier, - Int2Type is_pointer) -{ - ThreadStore(ptr, val); -} - -#endif // (CUB_PTX_ARCH <= 350) - - -/** - * Store with arbitrary MODIFIER on pointer types - */ -template -__device__ __forceinline__ void ThreadStore( - T *ptr, - T val, - Int2Type modifier, - Int2Type is_pointer) -{ - typedef typename WordAlignment::DeviceWord DeviceWord; // Word type for memcopying - enum { NUM_WORDS = sizeof(T) / sizeof(DeviceWord) }; - - // Store into array of uninitialized words - typename WordAlignment::UninitializedDeviceWords words; - *reinterpret_cast(words.buf) = val; - - // Memcopy words to aliased destination - IterateThreadStore::Store( - reinterpret_cast(ptr), - words.buf); -} - - -/** - * Generic ThreadStore definition - */ -template -__device__ __forceinline__ void ThreadStore(OutputIteratorRA itr, T val) -{ - ThreadStore( - itr, - val, - Int2Type(), - Int2Type::VALUE>()); -} - - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/** @} */ // end group IoModule - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/util_allocator.cuh b/kokkos/kokkos/TPL/cub/util_allocator.cuh deleted file mode 100644 index ae40f33..0000000 --- a/kokkos/kokkos/TPL/cub/util_allocator.cuh +++ /dev/null @@ -1,661 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/****************************************************************************** - * Simple caching allocator for device memory allocations. The allocator is - * thread-safe and capable of managing device allocations on multiple devices. - ******************************************************************************/ - -#pragma once - -#ifndef __CUDA_ARCH__ - #include // NVCC (EDG, really) takes FOREVER to compile std::map - #include -#endif - -#include - -#include "util_namespace.cuh" -#include "util_debug.cuh" - -#include "host/spinlock.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup UtilModule - * @{ - */ - - -/****************************************************************************** - * CachingDeviceAllocator (host use) - ******************************************************************************/ - -/** - * \brief A simple caching allocator for device memory allocations. - * - * \par Overview - * The allocator is thread-safe and is capable of managing cached device allocations - * on multiple devices. It behaves as follows: - * - * \par - * - Allocations categorized by bin size. - * - Bin sizes progress geometrically in accordance with the growth factor - * \p bin_growth provided during construction. Unused device allocations within - * a larger bin cache are not reused for allocation requests that categorize to - * smaller bin sizes. - * - Allocation requests below (\p bin_growth ^ \p min_bin) are rounded up to - * (\p bin_growth ^ \p min_bin). - * - Allocations above (\p bin_growth ^ \p max_bin) are not rounded up to the nearest - * bin and are simply freed when they are deallocated instead of being returned - * to a bin-cache. - * - %If the total storage of cached allocations on a given device will exceed - * \p max_cached_bytes, allocations for that device are simply freed when they are - * deallocated instead of being returned to their bin-cache. - * - * \par - * For example, the default-constructed CachingDeviceAllocator is configured with: - * - \p bin_growth = 8 - * - \p min_bin = 3 - * - \p max_bin = 7 - * - \p max_cached_bytes = 6MB - 1B - * - * \par - * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB - * and sets a maximum of 6,291,455 cached bytes per device - * - */ -struct CachingDeviceAllocator -{ -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - - //--------------------------------------------------------------------- - // Type definitions and constants - //--------------------------------------------------------------------- - - enum - { - /// Invalid device ordinal - INVALID_DEVICE_ORDINAL = -1, - }; - - /** - * Integer pow function for unsigned base and exponent - */ - static unsigned int IntPow( - unsigned int base, - unsigned int exp) - { - unsigned int retval = 1; - while (exp > 0) - { - if (exp & 1) { - retval = retval * base; // multiply the result by the current base - } - base = base * base; // square the base - exp = exp >> 1; // divide the exponent in half - } - return retval; - } - - - /** - * Round up to the nearest power-of - */ - static void NearestPowerOf( - unsigned int &power, - size_t &rounded_bytes, - unsigned int base, - size_t value) - { - power = 0; - rounded_bytes = 1; - - while (rounded_bytes < value) - { - rounded_bytes *= base; - power++; - } - } - - /** - * Descriptor for device memory allocations - */ - struct BlockDescriptor - { - int device; // device ordinal - void* d_ptr; // Device pointer - size_t bytes; // Size of allocation in bytes - unsigned int bin; // Bin enumeration - - // Constructor - BlockDescriptor(void *d_ptr, int device) : - d_ptr(d_ptr), - bytes(0), - bin(0), - device(device) {} - - // Constructor - BlockDescriptor(size_t bytes, unsigned int bin, int device) : - d_ptr(NULL), - bytes(bytes), - bin(bin), - device(device) {} - - // Comparison functor for comparing device pointers - static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b) - { - if (a.device < b.device) { - return true; - } else if (a.device > b.device) { - return false; - } else { - return (a.d_ptr < b.d_ptr); - } - } - - // Comparison functor for comparing allocation sizes - static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b) - { - if (a.device < b.device) { - return true; - } else if (a.device > b.device) { - return false; - } else { - return (a.bytes < b.bytes); - } - } - }; - - /// BlockDescriptor comparator function interface - typedef bool (*Compare)(const BlockDescriptor &, const BlockDescriptor &); - -#ifndef __CUDA_ARCH__ // Only define STL container members in host code - - /// Set type for cached blocks (ordered by size) - typedef std::multiset CachedBlocks; - - /// Set type for live blocks (ordered by ptr) - typedef std::multiset BusyBlocks; - - /// Map type of device ordinals to the number of cached bytes cached by each device - typedef std::map GpuCachedBytes; - -#endif // __CUDA_ARCH__ - - //--------------------------------------------------------------------- - // Fields - //--------------------------------------------------------------------- - - Spinlock spin_lock; /// Spinlock for thread-safety - - unsigned int bin_growth; /// Geometric growth factor for bin-sizes - unsigned int min_bin; /// Minimum bin enumeration - unsigned int max_bin; /// Maximum bin enumeration - - size_t min_bin_bytes; /// Minimum bin size - size_t max_bin_bytes; /// Maximum bin size - size_t max_cached_bytes; /// Maximum aggregate cached bytes per device - - bool debug; /// Whether or not to print (de)allocation events to stdout - bool skip_cleanup; /// Whether or not to skip a call to FreeAllCached() when destructor is called. (The CUDA runtime may have already shut down for statically declared allocators) - -#ifndef __CUDA_ARCH__ // Only define STL container members in host code - - GpuCachedBytes cached_bytes; /// Map of device ordinal to aggregate cached bytes on that device - CachedBlocks cached_blocks; /// Set of cached device allocations available for reuse - BusyBlocks live_blocks; /// Set of live device allocations currently in use - -#endif // __CUDA_ARCH__ - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - //--------------------------------------------------------------------- - // Methods - //--------------------------------------------------------------------- - - /** - * \brief Constructor. - */ - CachingDeviceAllocator( - unsigned int bin_growth, ///< Geometric growth factor for bin-sizes - unsigned int min_bin, ///< Minimum bin - unsigned int max_bin, ///< Maximum bin - size_t max_cached_bytes) ///< Maximum aggregate cached bytes per device - : - #ifndef __CUDA_ARCH__ // Only define STL container members in host code - cached_blocks(BlockDescriptor::SizeCompare), - live_blocks(BlockDescriptor::PtrCompare), - #endif - debug(false), - spin_lock(0), - bin_growth(bin_growth), - min_bin(min_bin), - max_bin(max_bin), - min_bin_bytes(IntPow(bin_growth, min_bin)), - max_bin_bytes(IntPow(bin_growth, max_bin)), - max_cached_bytes(max_cached_bytes) - {} - - - /** - * \brief Default constructor. - * - * Configured with: - * \par - * - \p bin_growth = 8 - * - \p min_bin = 3 - * - \p max_bin = 7 - * - \p max_cached_bytes = (\p bin_growth ^ \p max_bin) * 3) - 1 = 6,291,455 bytes - * - * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB and - * sets a maximum of 6,291,455 cached bytes per device - */ - CachingDeviceAllocator(bool skip_cleanup = false) : - #ifndef __CUDA_ARCH__ // Only define STL container members in host code - cached_blocks(BlockDescriptor::SizeCompare), - live_blocks(BlockDescriptor::PtrCompare), - #endif - skip_cleanup(skip_cleanup), - debug(false), - spin_lock(0), - bin_growth(8), - min_bin(3), - max_bin(7), - min_bin_bytes(IntPow(bin_growth, min_bin)), - max_bin_bytes(IntPow(bin_growth, max_bin)), - max_cached_bytes((max_bin_bytes * 3) - 1) - {} - - - /** - * \brief Sets the limit on the number bytes this allocator is allowed to cache per device. - */ - cudaError_t SetMaxCachedBytes( - size_t max_cached_bytes) - { - #ifdef __CUDA_ARCH__ - // Caching functionality only defined on host - return CubDebug(cudaErrorInvalidConfiguration); - #else - - // Lock - Lock(&spin_lock); - - this->max_cached_bytes = max_cached_bytes; - - if (debug) CubLog("New max_cached_bytes(%lld)\n", (long long) max_cached_bytes); - - // Unlock - Unlock(&spin_lock); - - return cudaSuccess; - - #endif // __CUDA_ARCH__ - } - - - /** - * \brief Provides a suitable allocation of device memory for the given size on the specified device - */ - cudaError_t DeviceAllocate( - void** d_ptr, - size_t bytes, - int device) - { - #ifdef __CUDA_ARCH__ - // Caching functionality only defined on host - return CubDebug(cudaErrorInvalidConfiguration); - #else - - bool locked = false; - int entrypoint_device = INVALID_DEVICE_ORDINAL; - cudaError_t error = cudaSuccess; - - // Round up to nearest bin size - unsigned int bin; - size_t bin_bytes; - NearestPowerOf(bin, bin_bytes, bin_growth, bytes); - if (bin < min_bin) { - bin = min_bin; - bin_bytes = min_bin_bytes; - } - - // Check if bin is greater than our maximum bin - if (bin > max_bin) - { - // Allocate the request exactly and give out-of-range bin - bin = (unsigned int) -1; - bin_bytes = bytes; - } - - BlockDescriptor search_key(bin_bytes, bin, device); - - // Lock - if (!locked) { - Lock(&spin_lock); - locked = true; - } - - do { - // Find a free block big enough within the same bin on the same device - CachedBlocks::iterator block_itr = cached_blocks.lower_bound(search_key); - if ((block_itr != cached_blocks.end()) && - (block_itr->device == device) && - (block_itr->bin == search_key.bin)) - { - // Reuse existing cache block. Insert into live blocks. - search_key = *block_itr; - live_blocks.insert(search_key); - - // Remove from free blocks - cached_blocks.erase(block_itr); - cached_bytes[device] -= search_key.bytes; - - if (debug) CubLog("\tdevice %d reused cached block (%lld bytes). %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n", - device, (long long) search_key.bytes, (long long) cached_blocks.size(), (long long) cached_bytes[device], (long long) live_blocks.size()); - } - else - { - // Need to allocate a new cache block. Unlock. - if (locked) { - Unlock(&spin_lock); - locked = false; - } - - // Set to specified device - if (CubDebug(error = cudaGetDevice(&entrypoint_device))) break; - if (CubDebug(error = cudaSetDevice(device))) break; - - // Allocate - if (CubDebug(error = cudaMalloc(&search_key.d_ptr, search_key.bytes))) break; - - // Lock - if (!locked) { - Lock(&spin_lock); - locked = true; - } - - // Insert into live blocks - live_blocks.insert(search_key); - - if (debug) CubLog("\tdevice %d allocating new device block %lld bytes. %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n", - device, (long long) search_key.bytes, (long long) cached_blocks.size(), (long long) cached_bytes[device], (long long) live_blocks.size()); - } - } while(0); - - // Unlock - if (locked) { - Unlock(&spin_lock); - locked = false; - } - - // Copy device pointer to output parameter (NULL on error) - *d_ptr = search_key.d_ptr; - - // Attempt to revert back to previous device if necessary - if (entrypoint_device != INVALID_DEVICE_ORDINAL) - { - if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; - } - - return error; - - #endif // __CUDA_ARCH__ - } - - - /** - * \brief Provides a suitable allocation of device memory for the given size on the current device - */ - cudaError_t DeviceAllocate( - void** d_ptr, - size_t bytes) - { - #ifdef __CUDA_ARCH__ - // Caching functionality only defined on host - return CubDebug(cudaErrorInvalidConfiguration); - #else - cudaError_t error = cudaSuccess; - do { - int current_device; - if (CubDebug(error = cudaGetDevice(¤t_device))) break; - if (CubDebug(error = DeviceAllocate(d_ptr, bytes, current_device))) break; - } while(0); - - return error; - - #endif // __CUDA_ARCH__ - } - - - /** - * \brief Frees a live allocation of device memory on the specified device, returning it to the allocator - */ - cudaError_t DeviceFree( - void* d_ptr, - int device) - { - #ifdef __CUDA_ARCH__ - // Caching functionality only defined on host - return CubDebug(cudaErrorInvalidConfiguration); - #else - - bool locked = false; - int entrypoint_device = INVALID_DEVICE_ORDINAL; - cudaError_t error = cudaSuccess; - - BlockDescriptor search_key(d_ptr, device); - - // Lock - if (!locked) { - Lock(&spin_lock); - locked = true; - } - - do { - // Find corresponding block descriptor - BusyBlocks::iterator block_itr = live_blocks.find(search_key); - if (block_itr == live_blocks.end()) - { - // Cannot find pointer - if (CubDebug(error = cudaErrorUnknown)) break; - } - else - { - // Remove from live blocks - search_key = *block_itr; - live_blocks.erase(block_itr); - - // Check if we should keep the returned allocation - if (cached_bytes[device] + search_key.bytes <= max_cached_bytes) - { - // Insert returned allocation into free blocks - cached_blocks.insert(search_key); - cached_bytes[device] += search_key.bytes; - - if (debug) CubLog("\tdevice %d returned %lld bytes. %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n", - device, (long long) search_key.bytes, (long long) cached_blocks.size(), (long long) cached_bytes[device], (long long) live_blocks.size()); - } - else - { - // Free the returned allocation. Unlock. - if (locked) { - Unlock(&spin_lock); - locked = false; - } - - // Set to specified device - if (CubDebug(error = cudaGetDevice(&entrypoint_device))) break; - if (CubDebug(error = cudaSetDevice(device))) break; - - // Free device memory - if (CubDebug(error = cudaFree(d_ptr))) break; - - if (debug) CubLog("\tdevice %d freed %lld bytes. %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n", - device, (long long) search_key.bytes, (long long) cached_blocks.size(), (long long) cached_bytes[device], (long long) live_blocks.size()); - } - } - } while (0); - - // Unlock - if (locked) { - Unlock(&spin_lock); - locked = false; - } - - // Attempt to revert back to entry-point device if necessary - if (entrypoint_device != INVALID_DEVICE_ORDINAL) - { - if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; - } - - return error; - - #endif // __CUDA_ARCH__ - } - - - /** - * \brief Frees a live allocation of device memory on the current device, returning it to the allocator - */ - cudaError_t DeviceFree( - void* d_ptr) - { - #ifdef __CUDA_ARCH__ - // Caching functionality only defined on host - return CubDebug(cudaErrorInvalidConfiguration); - #else - - int current_device; - cudaError_t error = cudaSuccess; - - do { - if (CubDebug(error = cudaGetDevice(¤t_device))) break; - if (CubDebug(error = DeviceFree(d_ptr, current_device))) break; - } while(0); - - return error; - - #endif // __CUDA_ARCH__ - } - - - /** - * \brief Frees all cached device allocations on all devices - */ - cudaError_t FreeAllCached() - { - #ifdef __CUDA_ARCH__ - // Caching functionality only defined on host - return CubDebug(cudaErrorInvalidConfiguration); - #else - - cudaError_t error = cudaSuccess; - bool locked = false; - int entrypoint_device = INVALID_DEVICE_ORDINAL; - int current_device = INVALID_DEVICE_ORDINAL; - - // Lock - if (!locked) { - Lock(&spin_lock); - locked = true; - } - - while (!cached_blocks.empty()) - { - // Get first block - CachedBlocks::iterator begin = cached_blocks.begin(); - - // Get entry-point device ordinal if necessary - if (entrypoint_device == INVALID_DEVICE_ORDINAL) - { - if (CubDebug(error = cudaGetDevice(&entrypoint_device))) break; - } - - // Set current device ordinal if necessary - if (begin->device != current_device) - { - if (CubDebug(error = cudaSetDevice(begin->device))) break; - current_device = begin->device; - } - - // Free device memory - if (CubDebug(error = cudaFree(begin->d_ptr))) break; - - // Reduce balance and erase entry - cached_bytes[current_device] -= begin->bytes; - cached_blocks.erase(begin); - - if (debug) CubLog("\tdevice %d freed %lld bytes. %lld available blocks cached (%lld bytes), %lld live blocks outstanding.\n", - current_device, (long long) begin->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[current_device], (long long) live_blocks.size()); - } - - // Unlock - if (locked) { - Unlock(&spin_lock); - locked = false; - } - - // Attempt to revert back to entry-point device if necessary - if (entrypoint_device != INVALID_DEVICE_ORDINAL) - { - if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; - } - - return error; - - #endif // __CUDA_ARCH__ - } - - - /** - * \brief Destructor - */ - virtual ~CachingDeviceAllocator() - { - if (!skip_cleanup) - FreeAllCached(); - } - -}; - - - - -/** @} */ // end group UtilModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/util_arch.cuh b/kokkos/kokkos/TPL/cub/util_arch.cuh deleted file mode 100644 index 232a33c..0000000 --- a/kokkos/kokkos/TPL/cub/util_arch.cuh +++ /dev/null @@ -1,295 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Static architectural properties by SM version. - */ - - -/****************************************************************************** - * Static architectural properties by SM version. - * - * "Device" reflects the PTX architecture targeted by the active compiler - * pass. It provides useful compile-time statics within device code. E.g.,: - * - * __shared__ int[Device::WARP_THREADS]; - * - * int padded_offset = threadIdx.x + (threadIdx.x >> Device::LOG_SMEM_BANKS); - * - ******************************************************************************/ - -#pragma once - -#include "util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup UtilModule - * @{ - */ - - -/// CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass). -#ifndef __CUDA_ARCH__ - #define CUB_PTX_ARCH 0 -#else - #define CUB_PTX_ARCH __CUDA_ARCH__ -#endif - - -/// Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API. -#if !defined(__CUDA_ARCH__) || defined(CUB_CDP) -#define CUB_RUNTIME_ENABLED -#endif - - -/// Execution space for destructors -#if ((CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH < 200)) - #define CUB_DESTRUCTOR __host__ -#else - #define CUB_DESTRUCTOR __host__ __device__ -#endif - - -/** - * \brief Structure for statically reporting CUDA device properties, parameterized by SM architecture. - * - * The default specialization is for SM10. - */ -template -struct ArchProps -{ - enum - { - LOG_WARP_THREADS = - 5, /// Log of the number of threads per warp - WARP_THREADS = - 1 << LOG_WARP_THREADS, /// Number of threads per warp - LOG_SMEM_BANKS = - 4, /// Log of the number of smem banks - SMEM_BANKS = - 1 << LOG_SMEM_BANKS, /// The number of smem banks - SMEM_BANK_BYTES = - 4, /// Size of smem bank words - SMEM_BYTES = - 16 * 1024, /// Maximum SM shared memory - SMEM_ALLOC_UNIT = - 512, /// Smem allocation size in bytes - REGS_BY_BLOCK = - true, /// Whether or not the architecture allocates registers by block (or by warp) - REG_ALLOC_UNIT = - 256, /// Number of registers allocated at a time per block (or by warp) - WARP_ALLOC_UNIT = - 2, /// Granularity of warps for which registers are allocated - MAX_SM_THREADS = - 768, /// Maximum number of threads per SM - MAX_SM_THREADBLOCKS = - 8, /// Maximum number of thread blocks per SM - MAX_BLOCK_THREADS = - 512, /// Maximum number of thread per thread block - MAX_SM_REGISTERS = - 8 * 1024, /// Maximum number of registers per SM - }; -}; - - - - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -/** - * Architecture properties for SM30 - */ -template <> -struct ArchProps<300> -{ - enum - { - LOG_WARP_THREADS = 5, // 32 threads per warp - WARP_THREADS = 1 << LOG_WARP_THREADS, - LOG_SMEM_BANKS = 5, // 32 banks - SMEM_BANKS = 1 << LOG_SMEM_BANKS, - SMEM_BANK_BYTES = 4, // 4 byte bank words - SMEM_BYTES = 48 * 1024, // 48KB shared memory - SMEM_ALLOC_UNIT = 256, // 256B smem allocation segment size - REGS_BY_BLOCK = false, // Allocates registers by warp - REG_ALLOC_UNIT = 256, // 256 registers allocated at a time per warp - WARP_ALLOC_UNIT = 4, // Registers are allocated at a granularity of every 4 warps per threadblock - MAX_SM_THREADS = 2048, // 2K max threads per SM - MAX_SM_THREADBLOCKS = 16, // 16 max threadblocks per SM - MAX_BLOCK_THREADS = 1024, // 1024 max threads per threadblock - MAX_SM_REGISTERS = 64 * 1024, // 64K max registers per SM - }; - - // Callback utility - template - static __host__ __device__ __forceinline__ void Callback(T &target, int sm_version) - { - target.template Callback(); - } -}; - - -/** - * Architecture properties for SM20 - */ -template <> -struct ArchProps<200> -{ - enum - { - LOG_WARP_THREADS = 5, // 32 threads per warp - WARP_THREADS = 1 << LOG_WARP_THREADS, - LOG_SMEM_BANKS = 5, // 32 banks - SMEM_BANKS = 1 << LOG_SMEM_BANKS, - SMEM_BANK_BYTES = 4, // 4 byte bank words - SMEM_BYTES = 48 * 1024, // 48KB shared memory - SMEM_ALLOC_UNIT = 128, // 128B smem allocation segment size - REGS_BY_BLOCK = false, // Allocates registers by warp - REG_ALLOC_UNIT = 64, // 64 registers allocated at a time per warp - WARP_ALLOC_UNIT = 2, // Registers are allocated at a granularity of every 2 warps per threadblock - MAX_SM_THREADS = 1536, // 1536 max threads per SM - MAX_SM_THREADBLOCKS = 8, // 8 max threadblocks per SM - MAX_BLOCK_THREADS = 1024, // 1024 max threads per threadblock - MAX_SM_REGISTERS = 32 * 1024, // 32K max registers per SM - }; - - // Callback utility - template - static __host__ __device__ __forceinline__ void Callback(T &target, int sm_version) - { - if (sm_version > 200) { - ArchProps<300>::Callback(target, sm_version); - } else { - target.template Callback(); - } - } -}; - - -/** - * Architecture properties for SM12 - */ -template <> -struct ArchProps<120> -{ - enum - { - LOG_WARP_THREADS = 5, // 32 threads per warp - WARP_THREADS = 1 << LOG_WARP_THREADS, - LOG_SMEM_BANKS = 4, // 16 banks - SMEM_BANKS = 1 << LOG_SMEM_BANKS, - SMEM_BANK_BYTES = 4, // 4 byte bank words - SMEM_BYTES = 16 * 1024, // 16KB shared memory - SMEM_ALLOC_UNIT = 512, // 512B smem allocation segment size - REGS_BY_BLOCK = true, // Allocates registers by threadblock - REG_ALLOC_UNIT = 512, // 512 registers allocated at time per threadblock - WARP_ALLOC_UNIT = 2, // Registers are allocated at a granularity of every 2 warps per threadblock - MAX_SM_THREADS = 1024, // 1024 max threads per SM - MAX_SM_THREADBLOCKS = 8, // 8 max threadblocks per SM - MAX_BLOCK_THREADS = 512, // 512 max threads per threadblock - MAX_SM_REGISTERS = 16 * 1024, // 16K max registers per SM - }; - - // Callback utility - template - static __host__ __device__ __forceinline__ void Callback(T &target, int sm_version) - { - if (sm_version > 120) { - ArchProps<200>::Callback(target, sm_version); - } else { - target.template Callback(); - } - } -}; - - -/** - * Architecture properties for SM10. Derives from the default ArchProps specialization. - */ -template <> -struct ArchProps<100> : ArchProps<0> -{ - // Callback utility - template - static __host__ __device__ __forceinline__ void Callback(T &target, int sm_version) - { - if (sm_version > 100) { - ArchProps<120>::Callback(target, sm_version); - } else { - target.template Callback(); - } - } -}; - - -/** - * Architecture properties for SM35 - */ -template <> -struct ArchProps<350> : ArchProps<300> {}; // Derives from SM30 - -/** - * Architecture properties for SM21 - */ -template <> -struct ArchProps<210> : ArchProps<200> {}; // Derives from SM20 - -/** - * Architecture properties for SM13 - */ -template <> -struct ArchProps<130> : ArchProps<120> {}; // Derives from SM12 - -/** - * Architecture properties for SM11 - */ -template <> -struct ArchProps<110> : ArchProps<100> {}; // Derives from SM10 - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/** - * \brief The architectural properties for the PTX version targeted by the active compiler pass. - */ -struct PtxArchProps : ArchProps {}; - - -/** @} */ // end group UtilModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/util_debug.cuh b/kokkos/kokkos/TPL/cub/util_debug.cuh deleted file mode 100644 index 2ac67d7..0000000 --- a/kokkos/kokkos/TPL/cub/util_debug.cuh +++ /dev/null @@ -1,115 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Error and event logging routines. - * - * The following macros definitions are supported: - * - \p CUB_LOG. Simple event messages are printed to \p stdout. - */ - -#pragma once - -#include -#include "util_namespace.cuh" -#include "util_arch.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup UtilModule - * @{ - */ - - -/// CUB error reporting macro (prints error messages to stderr) -#if (defined(DEBUG) || defined(_DEBUG)) - #define CUB_STDERR -#endif - - - -/** - * \brief %If \p CUB_STDERR is defined and \p error is not \p cudaSuccess, the corresponding error message is printed to \p stderr (or \p stdout in device code) along with the supplied source context. - * - * \return The CUDA error. - */ -__host__ __device__ __forceinline__ cudaError_t Debug( - cudaError_t error, - const char* filename, - int line) -{ -#ifdef CUB_STDERR - if (error) - { - #if (CUB_PTX_ARCH == 0) - fprintf(stderr, "CUDA error %d [%s, %d]: %s\n", error, filename, line, cudaGetErrorString(error)); - fflush(stderr); - #elif (CUB_PTX_ARCH >= 200) - printf("CUDA error %d [block %d, thread %d, %s, %d]\n", error, blockIdx.x, threadIdx.x, filename, line); - #endif - } -#endif - return error; -} - - -/** - * \brief Debug macro - */ -#define CubDebug(e) cub::Debug((e), __FILE__, __LINE__) - - -/** - * \brief Debug macro with exit - */ -#define CubDebugExit(e) if (cub::Debug((e), __FILE__, __LINE__)) { exit(1); } - - -/** - * \brief Log macro for printf statements. - */ -#if (CUB_PTX_ARCH == 0) - #define CubLog(format, ...) printf(format,__VA_ARGS__); -#elif (CUB_PTX_ARCH >= 200) - #define CubLog(format, ...) printf("[block %d, thread %d]: " format, blockIdx.x, threadIdx.x, __VA_ARGS__); -#endif - - - - -/** @} */ // end group UtilModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/util_device.cuh b/kokkos/kokkos/TPL/cub/util_device.cuh deleted file mode 100644 index 0631b92..0000000 --- a/kokkos/kokkos/TPL/cub/util_device.cuh +++ /dev/null @@ -1,378 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Properties of a given CUDA device and the corresponding PTX bundle - */ - -#pragma once - -#include "util_arch.cuh" -#include "util_debug.cuh" -#include "util_namespace.cuh" -#include "util_macro.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup UtilModule - * @{ - */ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - -/** - * Empty kernel for querying PTX manifest metadata (e.g., version) for the current device - */ -template -__global__ void EmptyKernel(void) { } - - -/** - * Alias temporaries to externally-allocated device storage (or simply return the amount of storage needed). - */ -template -__host__ __device__ __forceinline__ -cudaError_t AliasTemporaries( - void *d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. - size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \t d_temp_storage allocation - void* (&allocations)[ALLOCATIONS], ///< [in,out] Pointers to device allocations needed - size_t (&allocation_sizes)[ALLOCATIONS]) ///< [in] Sizes in bytes of device allocations needed -{ - const int ALIGN_BYTES = 256; - const int ALIGN_MASK = ~(ALIGN_BYTES - 1); - - // Compute exclusive prefix sum over allocation requests - size_t bytes_needed = 0; - for (int i = 0; i < ALLOCATIONS; ++i) - { - size_t allocation_bytes = (allocation_sizes[i] + ALIGN_BYTES - 1) & ALIGN_MASK; - allocation_sizes[i] = bytes_needed; - bytes_needed += allocation_bytes; - } - - // Check if the caller is simply requesting the size of the storage allocation - if (!d_temp_storage) - { - temp_storage_bytes = bytes_needed; - return cudaSuccess; - } - - // Check if enough storage provided - if (temp_storage_bytes < bytes_needed) - { - return CubDebug(cudaErrorMemoryAllocation); - } - - // Alias - for (int i = 0; i < ALLOCATIONS; ++i) - { - allocations[i] = static_cast(d_temp_storage) + allocation_sizes[i]; - } - - return cudaSuccess; -} - - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - - -/** - * \brief Retrieves the PTX version (major * 100 + minor * 10) - */ -__host__ __device__ __forceinline__ cudaError_t PtxVersion(int &ptx_version) -{ -#ifndef CUB_RUNTIME_ENABLED - - // CUDA API calls not supported from this device - return cudaErrorInvalidConfiguration; - -#else - - cudaError_t error = cudaSuccess; - do - { - cudaFuncAttributes empty_kernel_attrs; - if (CubDebug(error = cudaFuncGetAttributes(&empty_kernel_attrs, EmptyKernel))) break; - ptx_version = empty_kernel_attrs.ptxVersion * 10; - } - while (0); - - return error; - -#endif -} - - -/** - * Synchronize the stream if specified - */ -__host__ __device__ __forceinline__ -static cudaError_t SyncStream(cudaStream_t stream) -{ -#ifndef __CUDA_ARCH__ - return cudaStreamSynchronize(stream); -#else - // Device can't yet sync on a specific stream - return cudaDeviceSynchronize(); -#endif -} - - - -/** - * \brief Properties of a given CUDA device and the corresponding PTX bundle - */ -class Device -{ -private: - - /// Type definition of the EmptyKernel kernel entry point - typedef void (*EmptyKernelPtr)(); - - /// Force EmptyKernel to be generated if this class is used - __host__ __device__ __forceinline__ - EmptyKernelPtr Empty() - { - return EmptyKernel; - } - -public: - - // Version information - int sm_version; ///< SM version of target device (SM version X.YZ in XYZ integer form) - int ptx_version; ///< Bundled PTX version for target device (PTX version X.YZ in XYZ integer form) - - // Target device properties - int sm_count; ///< Number of SMs - int warp_threads; ///< Number of threads per warp - int smem_bank_bytes; ///< Number of bytes per SM bank - int smem_banks; ///< Number of smem banks - int smem_bytes; ///< Smem bytes per SM - int smem_alloc_unit; ///< Smem segment size - bool regs_by_block; ///< Whether registers are allocated by threadblock (or by warp) - int reg_alloc_unit; ///< Granularity of register allocation within the SM - int warp_alloc_unit; ///< Granularity of warp allocation within the SM - int max_sm_threads; ///< Maximum number of threads per SM - int max_sm_blocks; ///< Maximum number of threadblocks per SM - int max_block_threads; ///< Maximum number of threads per threadblock - int max_sm_registers; ///< Maximum number of registers per SM - int max_sm_warps; ///< Maximum number of warps per SM - - /** - * Callback for initializing device properties - */ - template - __host__ __device__ __forceinline__ void Callback() - { - warp_threads = ArchProps::WARP_THREADS; - smem_bank_bytes = ArchProps::SMEM_BANK_BYTES; - smem_banks = ArchProps::SMEM_BANKS; - smem_bytes = ArchProps::SMEM_BYTES; - smem_alloc_unit = ArchProps::SMEM_ALLOC_UNIT; - regs_by_block = ArchProps::REGS_BY_BLOCK; - reg_alloc_unit = ArchProps::REG_ALLOC_UNIT; - warp_alloc_unit = ArchProps::WARP_ALLOC_UNIT; - max_sm_threads = ArchProps::MAX_SM_THREADS; - max_sm_blocks = ArchProps::MAX_SM_THREADBLOCKS; - max_block_threads = ArchProps::MAX_BLOCK_THREADS; - max_sm_registers = ArchProps::MAX_SM_REGISTERS; - max_sm_warps = max_sm_threads / warp_threads; - } - - -public: - - /** - * Initializer. Properties are retrieved for the specified GPU ordinal. - */ - __host__ __device__ __forceinline__ - cudaError_t Init(int device_ordinal) - { - #ifndef CUB_RUNTIME_ENABLED - - // CUDA API calls not supported from this device - return CubDebug(cudaErrorInvalidConfiguration); - - #else - - cudaError_t error = cudaSuccess; - do - { - // Fill in SM version - int major, minor; - if (CubDebug(error = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device_ordinal))) break; - if (CubDebug(error = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_ordinal))) break; - sm_version = major * 100 + minor * 10; - - // Fill in static SM properties - // Initialize our device properties via callback from static device properties - ArchProps<100>::Callback(*this, sm_version); - - // Fill in SM count - if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; - - // Fill in PTX version - #if CUB_PTX_ARCH > 0 - ptx_version = CUB_PTX_ARCH; - #else - if (CubDebug(error = PtxVersion(ptx_version))) break; - #endif - - } - while (0); - - return error; - - #endif - } - - - /** - * Initializer. Properties are retrieved for the current GPU ordinal. - */ - __host__ __device__ __forceinline__ - cudaError_t Init() - { - #ifndef CUB_RUNTIME_ENABLED - - // CUDA API calls not supported from this device - return CubDebug(cudaErrorInvalidConfiguration); - - #else - - cudaError_t error = cudaSuccess; - do - { - int device_ordinal; - if ((error = CubDebug(cudaGetDevice(&device_ordinal)))) break; - if ((error = Init(device_ordinal))) break; - } - while (0); - return error; - - #endif - } - - - /** - * Computes maximum SM occupancy in thread blocks for the given kernel - */ - template - __host__ __device__ __forceinline__ - cudaError_t MaxSmOccupancy( - int &max_sm_occupancy, ///< [out] maximum number of thread blocks that can reside on a single SM - KernelPtr kernel_ptr, ///< [in] Kernel pointer for which to compute SM occupancy - int block_threads) ///< [in] Number of threads per thread block - { - #ifndef CUB_RUNTIME_ENABLED - - // CUDA API calls not supported from this device - return CubDebug(cudaErrorInvalidConfiguration); - - #else - - cudaError_t error = cudaSuccess; - do - { - // Get kernel attributes - cudaFuncAttributes kernel_attrs; - if (CubDebug(error = cudaFuncGetAttributes(&kernel_attrs, kernel_ptr))) break; - - // Number of warps per threadblock - int block_warps = (block_threads + warp_threads - 1) / warp_threads; - - // Max warp occupancy - int max_warp_occupancy = (block_warps > 0) ? - max_sm_warps / block_warps : - max_sm_blocks; - - // Maximum register occupancy - int max_reg_occupancy; - if ((block_threads == 0) || (kernel_attrs.numRegs == 0)) - { - // Prevent divide-by-zero - max_reg_occupancy = max_sm_blocks; - } - else if (regs_by_block) - { - // Allocates registers by threadblock - int block_regs = CUB_ROUND_UP_NEAREST(kernel_attrs.numRegs * warp_threads * block_warps, reg_alloc_unit); - max_reg_occupancy = max_sm_registers / block_regs; - } - else - { - // Allocates registers by warp - int sm_sides = warp_alloc_unit; - int sm_registers_per_side = max_sm_registers / sm_sides; - int regs_per_warp = CUB_ROUND_UP_NEAREST(kernel_attrs.numRegs * warp_threads, reg_alloc_unit); - int warps_per_side = sm_registers_per_side / regs_per_warp; - int warps = warps_per_side * sm_sides; - max_reg_occupancy = warps / block_warps; - } - - // Shared memory per threadblock - int block_allocated_smem = CUB_ROUND_UP_NEAREST( - kernel_attrs.sharedSizeBytes, - smem_alloc_unit); - - // Max shared memory occupancy - int max_smem_occupancy = (block_allocated_smem > 0) ? - (smem_bytes / block_allocated_smem) : - max_sm_blocks; - - // Max occupancy - max_sm_occupancy = CUB_MIN( - CUB_MIN(max_sm_blocks, max_warp_occupancy), - CUB_MIN(max_smem_occupancy, max_reg_occupancy)); - -// printf("max_smem_occupancy(%d), max_warp_occupancy(%d), max_reg_occupancy(%d)", max_smem_occupancy, max_warp_occupancy, max_reg_occupancy); - - } while (0); - - return error; - - #endif - } - -}; - - -/** @} */ // end group UtilModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/util_iterator.cuh b/kokkos/kokkos/TPL/cub/util_iterator.cuh deleted file mode 100644 index 08b574c..0000000 --- a/kokkos/kokkos/TPL/cub/util_iterator.cuh +++ /dev/null @@ -1,718 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Random-access iterator types - */ - -#pragma once - -#include "thread/thread_load.cuh" -#include "util_device.cuh" -#include "util_debug.cuh" -#include "util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/****************************************************************************** - * Texture references - *****************************************************************************/ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -// Anonymous namespace -namespace { - -/// Templated texture reference type -template -struct TexIteratorRef -{ - // Texture reference type - typedef texture TexRef; - - static TexRef ref; - - /** - * Bind texture - */ - static cudaError_t BindTexture(void *d_in) - { - cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc(); - if (d_in) - return (CubDebug(cudaBindTexture(NULL, ref, d_in, tex_desc))); - - return cudaSuccess; - } - - /** - * Unbind textures - */ - static cudaError_t UnbindTexture() - { - return CubDebug(cudaUnbindTexture(ref)); - } -}; - -// Texture reference definitions -template -typename TexIteratorRef::TexRef TexIteratorRef::ref = 0; - -} // Anonymous namespace - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - - - - - - -/** - * \addtogroup UtilModule - * @{ - */ - - -/****************************************************************************** - * Iterators - *****************************************************************************/ - -/** - * \brief A simple random-access iterator pointing to a range of constant values - * - * \par Overview - * ConstantIteratorRA is a random-access iterator that when dereferenced, always - * returns the supplied constant of type \p OutputType. - * - * \tparam OutputType The value type of this iterator - */ -template -class ConstantIteratorRA -{ -public: - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - typedef ConstantIteratorRA self_type; - typedef OutputType value_type; - typedef OutputType reference; - typedef OutputType* pointer; - typedef std::random_access_iterator_tag iterator_category; - typedef int difference_type; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - -private: - - OutputType val; - -public: - - /// Constructor - __host__ __device__ __forceinline__ ConstantIteratorRA( - const OutputType &val) ///< Constant value for the iterator instance to report - : - val(val) - {} - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - __host__ __device__ __forceinline__ self_type operator++() - { - self_type i = *this; - return i; - } - - __host__ __device__ __forceinline__ self_type operator++(int junk) - { - return *this; - } - - __host__ __device__ __forceinline__ reference operator*() - { - return val; - } - - template - __host__ __device__ __forceinline__ self_type operator+(SizeT n) - { - return ConstantIteratorRA(val); - } - - template - __host__ __device__ __forceinline__ self_type operator-(SizeT n) - { - return ConstantIteratorRA(val); - } - - template - __host__ __device__ __forceinline__ reference operator[](SizeT n) - { - return ConstantIteratorRA(val); - } - - __host__ __device__ __forceinline__ pointer operator->() - { - return &val; - } - - __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) - { - return (val == rhs.val); - } - - __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) - { - return (val != rhs.val); - } - -#endif // DOXYGEN_SHOULD_SKIP_THIS - -}; - - - -/** - * \brief A simple random-access transform iterator for applying a transformation operator. - * - * \par Overview - * TransformIteratorRA is a random-access iterator that wraps both a native - * device pointer of type InputType* and a unary conversion functor of - * type \p ConversionOp. \p OutputType references are made by pulling \p InputType - * values through the \p ConversionOp instance. - * - * \tparam InputType The value type of the pointer being wrapped - * \tparam ConversionOp Unary functor type for mapping objects of type \p InputType to type \p OutputType. Must have member OutputType operator()(const InputType &datum). - * \tparam OutputType The value type of this iterator - */ -template -class TransformIteratorRA -{ -public: - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - typedef TransformIteratorRA self_type; - typedef OutputType value_type; - typedef OutputType reference; - typedef OutputType* pointer; - typedef std::random_access_iterator_tag iterator_category; - typedef int difference_type; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - -private: - - ConversionOp conversion_op; - InputType* ptr; - -public: - - /** - * \brief Constructor - * @param ptr Native pointer to wrap - * @param conversion_op Binary transformation functor - */ - __host__ __device__ __forceinline__ TransformIteratorRA(InputType* ptr, ConversionOp conversion_op) : - conversion_op(conversion_op), - ptr(ptr) {} - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - __host__ __device__ __forceinline__ self_type operator++() - { - self_type i = *this; - ptr++; - return i; - } - - __host__ __device__ __forceinline__ self_type operator++(int junk) - { - ptr++; - return *this; - } - - __host__ __device__ __forceinline__ reference operator*() - { - return conversion_op(*ptr); - } - - template - __host__ __device__ __forceinline__ self_type operator+(SizeT n) - { - TransformIteratorRA retval(ptr + n, conversion_op); - return retval; - } - - template - __host__ __device__ __forceinline__ self_type operator-(SizeT n) - { - TransformIteratorRA retval(ptr - n, conversion_op); - return retval; - } - - template - __host__ __device__ __forceinline__ reference operator[](SizeT n) - { - return conversion_op(ptr[n]); - } - - __host__ __device__ __forceinline__ pointer operator->() - { - return &conversion_op(*ptr); - } - - __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) - { - return (ptr == rhs.ptr); - } - - __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) - { - return (ptr != rhs.ptr); - } - -#endif // DOXYGEN_SHOULD_SKIP_THIS - -}; - - - -/** - * \brief A simple random-access iterator for loading primitive values through texture cache. - * - * \par Overview - * TexIteratorRA is a random-access iterator that wraps a native - * device pointer of type T*. References made through TexIteratorRA - * causes values to be pulled through texture cache. - * - * \par Usage Considerations - * - Can only be used with primitive types (e.g., \p char, \p int, \p float), with the exception of \p double - * - Only one TexIteratorRA or TexIteratorRA of a certain \p InputType can be bound at any given time (per host thread) - * - * \tparam InputType The value type of the pointer being wrapped - * \tparam ConversionOp Unary functor type for mapping objects of type \p InputType to type \p OutputType. Must have member OutputType operator()(const InputType &datum). - * \tparam OutputType The value type of this iterator - */ -template -class TexIteratorRA -{ -public: -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - typedef TexIteratorRA self_type; - typedef T value_type; - typedef T reference; - typedef T* pointer; - typedef std::random_access_iterator_tag iterator_category; - typedef int difference_type; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - /// Tag identifying iterator type as being texture-bindable - typedef void TexBindingTag; - -private: - - T* ptr; - size_t tex_align_offset; - cudaTextureObject_t tex_obj; - -public: - - /** - * \brief Constructor - */ - __host__ __device__ __forceinline__ TexIteratorRA() - : - ptr(NULL), - tex_align_offset(0), - tex_obj(0) - {} - - /// \brief Bind iterator to texture reference - cudaError_t BindTexture( - T *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment - size_t bytes, ///< Number of items - size_t tex_align_offset = 0) ///< Offset (in items) from ptr denoting the position of the iterator - { - this->ptr = ptr; - this->tex_align_offset = tex_align_offset; - - int ptx_version; - cudaError_t error = cudaSuccess; - if (CubDebug(error = PtxVersion(ptx_version))) return error; - if (ptx_version >= 300) - { - // Use texture object - cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); - cudaResourceDesc res_desc; - cudaTextureDesc tex_desc; - memset(&res_desc, 0, sizeof(cudaResourceDesc)); - memset(&tex_desc, 0, sizeof(cudaTextureDesc)); - res_desc.resType = cudaResourceTypeLinear; - res_desc.res.linear.devPtr = ptr; - res_desc.res.linear.desc = channel_desc; - res_desc.res.linear.sizeInBytes = bytes; - tex_desc.readMode = cudaReadModeElementType; - return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL); - } - else - { - // Use texture reference - return TexIteratorRef::BindTexture(ptr); - } - } - - /// \brief Unbind iterator to texture reference - cudaError_t UnbindTexture() - { - int ptx_version; - cudaError_t error = cudaSuccess; - if (CubDebug(error = PtxVersion(ptx_version))) return error; - if (ptx_version < 300) - { - // Use texture reference - return TexIteratorRef::UnbindTexture(); - } - else - { - // Use texture object - return cudaDestroyTextureObject(tex_obj); - } - } - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - __host__ __device__ __forceinline__ self_type operator++() - { - self_type i = *this; - ptr++; - tex_align_offset++; - return i; - } - - __host__ __device__ __forceinline__ self_type operator++(int junk) - { - ptr++; - tex_align_offset++; - return *this; - } - - __host__ __device__ __forceinline__ reference operator*() - { -#if (CUB_PTX_ARCH == 0) - // Simply dereference the pointer on the host - return *ptr; -#elif (CUB_PTX_ARCH < 300) - // Use the texture reference - return tex1Dfetch(TexIteratorRef::ref, tex_align_offset); -#else - // Use the texture object - return conversion_op(tex1Dfetch(tex_obj, tex_align_offset)); -#endif - } - - template - __host__ __device__ __forceinline__ self_type operator+(SizeT n) - { - TexIteratorRA retval; - retval.ptr = ptr + n; - retval.tex_align_offset = tex_align_offset + n; - return retval; - } - - template - __host__ __device__ __forceinline__ self_type operator-(SizeT n) - { - TexIteratorRA retval; - retval.ptr = ptr - n; - retval.tex_align_offset = tex_align_offset - n; - return retval; - } - - template - __host__ __device__ __forceinline__ reference operator[](SizeT n) - { -#if (CUB_PTX_ARCH == 0) - // Simply dereference the pointer on the host - return ptr[n]; -#elif (CUB_PTX_ARCH < 300) - // Use the texture reference - return tex1Dfetch(TexIteratorRef::ref, tex_align_offset + n); -#else - // Use the texture object - return conversion_op(tex1Dfetch(tex_obj, tex_align_offset + n)); -#endif - } - - __host__ __device__ __forceinline__ pointer operator->() - { -#if (CUB_PTX_ARCH == 0) - // Simply dereference the pointer on the host - return &(*ptr); -#elif (CUB_PTX_ARCH < 300) - // Use the texture reference - return &(tex1Dfetch(TexIteratorRef::ref, tex_align_offset)); -#else - // Use the texture object - return conversion_op(tex1Dfetch(tex_obj, tex_align_offset)); -#endif - } - - __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) - { - return (ptr == rhs.ptr); - } - - __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) - { - return (ptr != rhs.ptr); - } - -#endif // DOXYGEN_SHOULD_SKIP_THIS - -}; - - -/** - * \brief A simple random-access transform iterator for loading primitive values through texture cache and and subsequently applying a transformation operator. - * - * \par Overview - * TexTransformIteratorRA is a random-access iterator that wraps both a native - * device pointer of type InputType* and a unary conversion functor of - * type \p ConversionOp. \p OutputType references are made by pulling \p InputType - * values through the texture cache and then transformed them using the - * \p ConversionOp instance. - * - * \par Usage Considerations - * - Can only be used with primitive types (e.g., \p char, \p int, \p float), with the exception of \p double - * - Only one TexIteratorRA or TexTransformIteratorRA of a certain \p InputType can be bound at any given time (per host thread) - * - * \tparam InputType The value type of the pointer being wrapped - * \tparam ConversionOp Unary functor type for mapping objects of type \p InputType to type \p OutputType. Must have member OutputType operator()(const InputType &datum). - * \tparam OutputType The value type of this iterator - */ -template -class TexTransformIteratorRA -{ -public: - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - typedef TexTransformIteratorRA self_type; - typedef OutputType value_type; - typedef OutputType reference; - typedef OutputType* pointer; - typedef std::random_access_iterator_tag iterator_category; - typedef int difference_type; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - /// Tag identifying iterator type as being texture-bindable - typedef void TexBindingTag; - -private: - - ConversionOp conversion_op; - InputType* ptr; - size_t tex_align_offset; - cudaTextureObject_t tex_obj; - -public: - - /** - * \brief Constructor - */ - TexTransformIteratorRA( - ConversionOp conversion_op) ///< Binary transformation functor - : - conversion_op(conversion_op), - ptr(NULL), - tex_align_offset(0), - tex_obj(0) - {} - - /// \brief Bind iterator to texture reference - cudaError_t BindTexture( - InputType* ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment - size_t bytes, ///< Number of items - size_t tex_align_offset = 0) ///< Offset (in items) from ptr denoting the position of the iterator - { - this->ptr = ptr; - this->tex_align_offset = tex_align_offset; - - int ptx_version; - cudaError_t error = cudaSuccess; - if (CubDebug(error = PtxVersion(ptx_version))) return error; - if (ptx_version >= 300) - { - // Use texture object - cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); - cudaResourceDesc res_desc; - cudaTextureDesc tex_desc; - memset(&res_desc, 0, sizeof(cudaResourceDesc)); - memset(&tex_desc, 0, sizeof(cudaTextureDesc)); - res_desc.resType = cudaResourceTypeLinear; - res_desc.res.linear.devPtr = ptr; - res_desc.res.linear.desc = channel_desc; - res_desc.res.linear.sizeInBytes = bytes; - tex_desc.readMode = cudaReadModeElementType; - return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL); - } - else - { - // Use texture reference - return TexIteratorRef::BindTexture(ptr); - } - } - - /// \brief Unbind iterator to texture reference - cudaError_t UnbindTexture() - { - int ptx_version; - cudaError_t error = cudaSuccess; - if (CubDebug(error = PtxVersion(ptx_version))) return error; - if (ptx_version >= 300) - { - // Use texture object - return cudaDestroyTextureObject(tex_obj); - } - else - { - // Use texture reference - return TexIteratorRef::UnbindTexture(); - } - } - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - __host__ __device__ __forceinline__ self_type operator++() - { - self_type i = *this; - ptr++; - tex_align_offset++; - return i; - } - - __host__ __device__ __forceinline__ self_type operator++(int junk) - { - ptr++; - tex_align_offset++; - return *this; - } - - __host__ __device__ __forceinline__ reference operator*() - { -#if (CUB_PTX_ARCH == 0) - // Simply dereference the pointer on the host - return conversion_op(*ptr); -#elif (CUB_PTX_ARCH < 300) - // Use the texture reference - return conversion_op(tex1Dfetch(TexIteratorRef::ref, tex_align_offset)); -#else - // Use the texture object - return conversion_op(tex1Dfetch(tex_obj, tex_align_offset)); -#endif - } - - template - __host__ __device__ __forceinline__ self_type operator+(SizeT n) - { - TexTransformIteratorRA retval(conversion_op); - retval.ptr = ptr + n; - retval.tex_align_offset = tex_align_offset + n; - return retval; - } - - template - __host__ __device__ __forceinline__ self_type operator-(SizeT n) - { - TexTransformIteratorRA retval(conversion_op); - retval.ptr = ptr - n; - retval.tex_align_offset = tex_align_offset - n; - return retval; - } - - template - __host__ __device__ __forceinline__ reference operator[](SizeT n) - { -#if (CUB_PTX_ARCH == 0) - // Simply dereference the pointer on the host - return conversion_op(ptr[n]); -#elif (CUB_PTX_ARCH < 300) - // Use the texture reference - return conversion_op(tex1Dfetch(TexIteratorRef::ref, tex_align_offset + n)); -#else - // Use the texture object - return conversion_op(tex1Dfetch(tex_obj, tex_align_offset + n)); -#endif - } - - __host__ __device__ __forceinline__ pointer operator->() - { -#if (CUB_PTX_ARCH == 0) - // Simply dereference the pointer on the host - return &conversion_op(*ptr); -#elif (CUB_PTX_ARCH < 300) - // Use the texture reference - return &conversion_op(tex1Dfetch(TexIteratorRef::ref, tex_align_offset)); -#else - // Use the texture object - return &conversion_op(tex1Dfetch(tex_obj, tex_align_offset)); -#endif - } - - __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) - { - return (ptr == rhs.ptr); - } - - __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) - { - return (ptr != rhs.ptr); - } - -#endif // DOXYGEN_SHOULD_SKIP_THIS - -}; - - - - -/** @} */ // end group UtilModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/util_macro.cuh b/kokkos/kokkos/TPL/cub/util_macro.cuh deleted file mode 100644 index 091fd93..0000000 --- a/kokkos/kokkos/TPL/cub/util_macro.cuh +++ /dev/null @@ -1,107 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/****************************************************************************** - * Common C/C++ macro utilities - ******************************************************************************/ - -#pragma once - -#include "util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup UtilModule - * @{ - */ - -/** - * Align struct - */ -#if defined(_WIN32) || defined(_WIN64) - #define CUB_ALIGN(bytes) __declspec(align(32)) -#else - #define CUB_ALIGN(bytes) __attribute__((aligned(bytes))) -#endif - -/** - * Select maximum(a, b) - */ -#define CUB_MAX(a, b) (((a) > (b)) ? (a) : (b)) - -/** - * Select minimum(a, b) - */ -#define CUB_MIN(a, b) (((a) < (b)) ? (a) : (b)) - -/** - * Quotient of x/y rounded down to nearest integer - */ -#define CUB_QUOTIENT_FLOOR(x, y) ((x) / (y)) - -/** - * Quotient of x/y rounded up to nearest integer - */ -#define CUB_QUOTIENT_CEILING(x, y) (((x) + (y) - 1) / (y)) - -/** - * x rounded up to the nearest multiple of y - */ -#define CUB_ROUND_UP_NEAREST(x, y) ((((x) + (y) - 1) / (y)) * y) - -/** - * x rounded down to the nearest multiple of y - */ -#define CUB_ROUND_DOWN_NEAREST(x, y) (((x) / (y)) * y) - -/** - * Return character string for given type - */ -#define CUB_TYPE_STRING(type) ""#type - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - #define CUB_CAT_(a, b) a ## b - #define CUB_CAT(a, b) CUB_CAT_(a, b) -#endif // DOXYGEN_SHOULD_SKIP_THIS - -/** - * Static assert - */ -#define CUB_STATIC_ASSERT(cond, msg) typedef int CUB_CAT(cub_static_assert, __LINE__)[(cond) ? 1 : -1] - - -/** @} */ // end group UtilModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/util_namespace.cuh b/kokkos/kokkos/TPL/cub/util_namespace.cuh deleted file mode 100644 index 869ecc6..0000000 --- a/kokkos/kokkos/TPL/cub/util_namespace.cuh +++ /dev/null @@ -1,41 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Place-holder for prefixing the cub namespace - */ - -#pragma once - -// For example: -//#define CUB_NS_PREFIX namespace thrust{ namespace detail { -//#define CUB_NS_POSTFIX } } - -#define CUB_NS_PREFIX -#define CUB_NS_POSTFIX diff --git a/kokkos/kokkos/TPL/cub/util_ptx.cuh b/kokkos/kokkos/TPL/cub/util_ptx.cuh deleted file mode 100644 index ad80b04..0000000 --- a/kokkos/kokkos/TPL/cub/util_ptx.cuh +++ /dev/null @@ -1,380 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * PTX intrinsics - */ - - -#pragma once - -#include "util_type.cuh" -#include "util_arch.cuh" -#include "util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup UtilModule - * @{ - */ - - -/****************************************************************************** - * PTX helper macros - ******************************************************************************/ - -/** - * Register modifier for pointer-types (for inlining PTX assembly) - */ -#if defined(_WIN64) || defined(__LP64__) - #define __CUB_LP64__ 1 - // 64-bit register modifier for inlined asm - #define _CUB_ASM_PTR_ "l" - #define _CUB_ASM_PTR_SIZE_ "u64" -#else - #define __CUB_LP64__ 0 - // 32-bit register modifier for inlined asm - #define _CUB_ASM_PTR_ "r" - #define _CUB_ASM_PTR_SIZE_ "u32" -#endif - - -/****************************************************************************** - * Inlined PTX intrinsics - ******************************************************************************/ - -/** - * Shift-right then add. Returns (x >> shift) + addend. - */ -__device__ __forceinline__ unsigned int SHR_ADD( - unsigned int x, - unsigned int shift, - unsigned int addend) -{ - unsigned int ret; -#if __CUDA_ARCH__ >= 200 - asm("vshr.u32.u32.u32.clamp.add %0, %1, %2, %3;" : - "=r"(ret) : "r"(x), "r"(shift), "r"(addend)); -#else - ret = (x >> shift) + addend; -#endif - return ret; -} - - -/** - * Shift-left then add. Returns (x << shift) + addend. - */ -__device__ __forceinline__ unsigned int SHL_ADD( - unsigned int x, - unsigned int shift, - unsigned int addend) -{ - unsigned int ret; -#if __CUDA_ARCH__ >= 200 - asm("vshl.u32.u32.u32.clamp.add %0, %1, %2, %3;" : - "=r"(ret) : "r"(x), "r"(shift), "r"(addend)); -#else - ret = (x << shift) + addend; -#endif - return ret; -} - - -/** - * Bitfield-extract. - */ -template -__device__ __forceinline__ unsigned int BFE( - UnsignedBits source, - unsigned int bit_start, - unsigned int num_bits) -{ - unsigned int bits; -#if __CUDA_ARCH__ >= 200 - asm("bfe.u32 %0, %1, %2, %3;" : "=r"(bits) : "r"((unsigned int) source), "r"(bit_start), "r"(num_bits)); -#else - const unsigned int MASK = (1 << num_bits) - 1; - bits = (source >> bit_start) & MASK; -#endif - return bits; -} - - -/** - * Bitfield-extract for 64-bit types. - */ -__device__ __forceinline__ unsigned int BFE( - unsigned long long source, - unsigned int bit_start, - unsigned int num_bits) -{ - const unsigned long long MASK = (1ull << num_bits) - 1; - return (source >> bit_start) & MASK; -} - - -/** - * Bitfield insert. Inserts the first num_bits of y into x starting at bit_start - */ -__device__ __forceinline__ void BFI( - unsigned int &ret, - unsigned int x, - unsigned int y, - unsigned int bit_start, - unsigned int num_bits) -{ -#if __CUDA_ARCH__ >= 200 - asm("bfi.b32 %0, %1, %2, %3, %4;" : - "=r"(ret) : "r"(y), "r"(x), "r"(bit_start), "r"(num_bits)); -#else - // TODO -#endif -} - - -/** - * Three-operand add - */ -__device__ __forceinline__ unsigned int IADD3(unsigned int x, unsigned int y, unsigned int z) -{ -#if __CUDA_ARCH__ >= 200 - asm("vadd.u32.u32.u32.add %0, %1, %2, %3;" : "=r"(x) : "r"(x), "r"(y), "r"(z)); -#else - x = x + y + z; -#endif - return x; -} - - -/** - * Byte-permute. Pick four arbitrary bytes from two 32-bit registers, and - * reassemble them into a 32-bit destination register - */ -__device__ __forceinline__ int PRMT(unsigned int a, unsigned int b, unsigned int index) -{ - int ret; - asm("prmt.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(a), "r"(b), "r"(index)); - return ret; -} - - -/** - * Sync-threads barrier. - */ -__device__ __forceinline__ void BAR(int count) -{ - asm volatile("bar.sync 1, %0;" : : "r"(count)); -} - - -/** - * Floating point multiply. (Mantissa LSB rounds towards zero.) - */ -__device__ __forceinline__ float FMUL_RZ(float a, float b) -{ - float d; - asm("mul.rz.f32 %0, %1, %2;" : "=f"(d) : "f"(a), "f"(b)); - return d; -} - - -/** - * Floating point multiply-add. (Mantissa LSB rounds towards zero.) - */ -__device__ __forceinline__ float FFMA_RZ(float a, float b, float c) -{ - float d; - asm("fma.rz.f32 %0, %1, %2, %3;" : "=f"(d) : "f"(a), "f"(b), "f"(c)); - return d; -} - - -/** - * Terminates the calling thread - */ -__device__ __forceinline__ void ThreadExit() { - asm("exit;"); -} - - -/** - * Returns the warp lane ID of the calling thread - */ -__device__ __forceinline__ unsigned int LaneId() -{ - unsigned int ret; - asm("mov.u32 %0, %laneid;" : "=r"(ret) ); - return ret; -} - - -/** - * Returns the warp ID of the calling thread - */ -__device__ __forceinline__ unsigned int WarpId() -{ - unsigned int ret; - asm("mov.u32 %0, %warpid;" : "=r"(ret) ); - return ret; -} - -/** - * Returns the warp lane mask of all lanes less than the calling thread - */ -__device__ __forceinline__ unsigned int LaneMaskLt() -{ - unsigned int ret; - asm("mov.u32 %0, %lanemask_lt;" : "=r"(ret) ); - return ret; -} - -/** - * Returns the warp lane mask of all lanes less than or equal to the calling thread - */ -__device__ __forceinline__ unsigned int LaneMaskLe() -{ - unsigned int ret; - asm("mov.u32 %0, %lanemask_le;" : "=r"(ret) ); - return ret; -} - -/** - * Returns the warp lane mask of all lanes greater than the calling thread - */ -__device__ __forceinline__ unsigned int LaneMaskGt() -{ - unsigned int ret; - asm("mov.u32 %0, %lanemask_gt;" : "=r"(ret) ); - return ret; -} - -/** - * Returns the warp lane mask of all lanes greater than or equal to the calling thread - */ -__device__ __forceinline__ unsigned int LaneMaskGe() -{ - unsigned int ret; - asm("mov.u32 %0, %lanemask_ge;" : "=r"(ret) ); - return ret; -} - -/** - * Portable implementation of __all - */ -__device__ __forceinline__ int WarpAll(int cond) -{ -#if CUB_PTX_ARCH < 120 - - __shared__ volatile int warp_signals[PtxArchProps::MAX_SM_THREADS / PtxArchProps::WARP_THREADS]; - - if (LaneId() == 0) - warp_signals[WarpId()] = 1; - - if (cond == 0) - warp_signals[WarpId()] = 0; - - return warp_signals[WarpId()]; - -#else - - return __all(cond); - -#endif -} - - -/** - * Portable implementation of __any - */ -__device__ __forceinline__ int WarpAny(int cond) -{ -#if CUB_PTX_ARCH < 120 - - __shared__ volatile int warp_signals[PtxArchProps::MAX_SM_THREADS / PtxArchProps::WARP_THREADS]; - - if (LaneId() == 0) - warp_signals[WarpId()] = 0; - - if (cond) - warp_signals[WarpId()] = 1; - - return warp_signals[WarpId()]; - -#else - - return __any(cond); - -#endif -} - - -/// Generic shuffle-up -template -__device__ __forceinline__ T ShuffleUp( - T input, ///< [in] The value to broadcast - int src_offset) ///< [in] The up-offset of the peer to read from -{ - enum - { - SHFL_C = 0, - }; - - typedef typename WordAlignment::ShuffleWord ShuffleWord; - - const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); - T output; - ShuffleWord *output_alias = reinterpret_cast(&output); - ShuffleWord *input_alias = reinterpret_cast(&input); - - #pragma unroll - for (int WORD = 0; WORD < WORDS; ++WORD) - { - unsigned int shuffle_word = input_alias[WORD]; - asm( - " shfl.up.b32 %0, %1, %2, %3;" - : "=r"(shuffle_word) : "r"(shuffle_word), "r"(src_offset), "r"(SHFL_C)); - output_alias[WORD] = (ShuffleWord) shuffle_word; - } - - return output; -} - - - -/** @} */ // end group UtilModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/util_type.cuh b/kokkos/kokkos/TPL/cub/util_type.cuh deleted file mode 100644 index 836aa0f..0000000 --- a/kokkos/kokkos/TPL/cub/util_type.cuh +++ /dev/null @@ -1,685 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Common type manipulation (metaprogramming) utilities - */ - -#pragma once - -#include -#include - -#include "util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup UtilModule - * @{ - */ - - - -/****************************************************************************** - * Type equality - ******************************************************************************/ - -/** - * \brief Type selection (IF ? ThenType : ElseType) - */ -template -struct If -{ - /// Conditional type result - typedef ThenType Type; // true -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -template -struct If -{ - typedef ElseType Type; // false -}; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/****************************************************************************** - * Conditional types - ******************************************************************************/ - - -/** - * \brief Type equality test - */ -template -struct Equals -{ - enum { - VALUE = 0, - NEGATE = 1 - }; -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -template -struct Equals -{ - enum { - VALUE = 1, - NEGATE = 0 - }; -}; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/****************************************************************************** - * Marker types - ******************************************************************************/ - -/** - * \brief A simple "NULL" marker type - */ -struct NullType -{ -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - template - __host__ __device__ __forceinline__ NullType& operator =(const T& b) { return *this; } -#endif // DOXYGEN_SHOULD_SKIP_THIS -}; - - -/** - * \brief Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values) - */ -template -struct Int2Type -{ - enum {VALUE = A}; -}; - - -/****************************************************************************** - * Size and alignment - ******************************************************************************/ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -template -struct WordAlignment -{ - struct Pad - { - T val; - char byte; - }; - - enum - { - /// The alignment of T in bytes - ALIGN_BYTES = sizeof(Pad) - sizeof(T) - }; - - /// Biggest shuffle word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If<(ALIGN_BYTES % 4 == 0), - int, - typename If<(ALIGN_BYTES % 2 == 0), - short, - char>::Type>::Type ShuffleWord; - - /// Biggest volatile word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If<(ALIGN_BYTES % 8 == 0), - long long, - ShuffleWord>::Type VolatileWord; - - /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If<(ALIGN_BYTES % 16 == 0), - longlong2, - typename If<(ALIGN_BYTES % 8 == 0), - long long, // needed to get heterogenous PODs to work on all platforms - ShuffleWord>::Type>::Type DeviceWord; - - enum - { - DEVICE_MULTIPLE = sizeof(DeviceWord) / sizeof(T) - }; - - struct UninitializedBytes - { - char buf[sizeof(T)]; - }; - - struct UninitializedShuffleWords - { - ShuffleWord buf[sizeof(T) / sizeof(ShuffleWord)]; - }; - - struct UninitializedVolatileWords - { - VolatileWord buf[sizeof(T) / sizeof(VolatileWord)]; - }; - - struct UninitializedDeviceWords - { - DeviceWord buf[sizeof(T) / sizeof(DeviceWord)]; - }; - - -}; - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/****************************************************************************** - * Wrapper types - ******************************************************************************/ - -/** - * \brief A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions - */ -template -struct Uninitialized -{ - /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T - typedef typename WordAlignment::DeviceWord DeviceWord; - - enum - { - WORDS = sizeof(T) / sizeof(DeviceWord) - }; - - /// Backing storage - DeviceWord storage[WORDS]; - - /// Alias - __host__ __device__ __forceinline__ T& Alias() - { - return reinterpret_cast(*this); - } -}; - - -/** - * \brief A wrapper for passing simple static arrays as kernel parameters - */ -template -struct ArrayWrapper -{ - /// Static array of type \p T - T array[COUNT]; -}; - - -/** - * \brief Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth. - * - * Many multi-pass computations require a pair of "ping-pong" storage - * buffers (e.g., one for reading from and the other for writing to, and then - * vice-versa for the subsequent pass). This structure wraps a set of device - * buffers and a "selector" member to track which is "current". - */ -template -struct DoubleBuffer -{ - /// Pair of device buffer pointers - T *d_buffers[2]; - - /// Selector into \p d_buffers (i.e., the active/valid buffer) - int selector; - - /// \brief Constructor - __host__ __device__ __forceinline__ DoubleBuffer() - { - selector = 0; - d_buffers[0] = NULL; - d_buffers[1] = NULL; - } - - /// \brief Constructor - __host__ __device__ __forceinline__ DoubleBuffer( - T *d_current, ///< The currently valid buffer - T *d_alternate) ///< Alternate storage buffer of the same size as \p d_current - { - selector = 0; - d_buffers[0] = d_current; - d_buffers[1] = d_alternate; - } - - /// \brief Return pointer to the currently valid buffer - __host__ __device__ __forceinline__ T* Current() { return d_buffers[selector]; } -}; - - - -/****************************************************************************** - * Static math - ******************************************************************************/ - -/** - * \brief Statically determine log2(N), rounded up. - * - * For example: - * Log2<8>::VALUE // 3 - * Log2<3>::VALUE // 2 - */ -template -struct Log2 -{ - /// Static logarithm value - enum { VALUE = Log2> 1), COUNT + 1>::VALUE }; // Inductive case -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document -template -struct Log2 -{ - enum {VALUE = (1 << (COUNT - 1) < N) ? // Base case - COUNT : - COUNT - 1 }; -}; -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/** - * \brief Statically determine if N is a power-of-two - */ -template -struct PowerOfTwo -{ - enum { VALUE = ((N & (N - 1)) == 0) }; -}; - - - -/****************************************************************************** - * Pointer vs. iterator detection - ******************************************************************************/ - - -/** - * \brief Pointer vs. iterator - */ -template -struct IsPointer -{ - enum { VALUE = 0 }; -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -template -struct IsPointer -{ - enum { VALUE = 1 }; -}; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - - -/****************************************************************************** - * Qualifier detection - ******************************************************************************/ - -/** - * \brief Volatile modifier test - */ -template -struct IsVolatile -{ - enum { VALUE = 0 }; -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -template -struct IsVolatile -{ - enum { VALUE = 1 }; -}; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/****************************************************************************** - * Qualifier removal - ******************************************************************************/ - -/** - * \brief Removes \p const and \p volatile qualifiers from type \p Tp. - * - * For example: - * typename RemoveQualifiers::Type // int; - */ -template -struct RemoveQualifiers -{ - /// Type without \p const and \p volatile qualifiers - typedef Up Type; -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -template -struct RemoveQualifiers -{ - typedef Up Type; -}; - -template -struct RemoveQualifiers -{ - typedef Up Type; -}; - -template -struct RemoveQualifiers -{ - typedef Up Type; -}; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - - -/****************************************************************************** - * Typedef-detection - ******************************************************************************/ - - -/** - * \brief Defines a structure \p detector_name that is templated on type \p T. The \p detector_name struct exposes a constant member \p VALUE indicating whether or not parameter \p T exposes a nested type \p nested_type_name - */ -#define CUB_DEFINE_DETECT_NESTED_TYPE(detector_name, nested_type_name) \ - template \ - struct detector_name \ - { \ - template \ - static char& test(typename C::nested_type_name*); \ - template \ - static int& test(...); \ - enum \ - { \ - VALUE = sizeof(test(0)) < sizeof(int) \ - }; \ - }; - - - -/****************************************************************************** - * Simple enable-if (similar to Boost) - ******************************************************************************/ - -/** - * \brief Simple enable-if (similar to Boost) - */ -template -struct EnableIf -{ - /// Enable-if type for SFINAE dummy variables - typedef T Type; -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -template -struct EnableIf {}; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/****************************************************************************** - * Typedef-detection - ******************************************************************************/ - -/** - * \brief Determine whether or not BinaryOp's functor is of the form bool operator()(const T& a, const T&b) or bool operator()(const T& a, const T&b, unsigned int idx) - */ -template -struct BinaryOpHasIdxParam -{ -private: - template struct SFINAE1 {}; - template struct SFINAE2 {}; - template struct SFINAE3 {}; - template struct SFINAE4 {}; - - template struct SFINAE5 {}; - template struct SFINAE6 {}; - template struct SFINAE7 {}; - template struct SFINAE8 {}; - - template static char Test(SFINAE1 *); - template static char Test(SFINAE2 *); - template static char Test(SFINAE3 *); - template static char Test(SFINAE4 *); - - template static char Test(SFINAE5 *); - template static char Test(SFINAE6 *); - template static char Test(SFINAE7 *); - template static char Test(SFINAE8 *); - - template static int Test(...); - -public: - - /// Whether the functor BinaryOp has a third unsigned int index param - static const bool HAS_PARAM = sizeof(Test(NULL)) == sizeof(char); -}; - - - -/****************************************************************************** - * Simple type traits utilities. - * - * For example: - * Traits::CATEGORY // SIGNED_INTEGER - * Traits::NULL_TYPE // true - * Traits::CATEGORY // NOT_A_NUMBER - * Traits::PRIMITIVE; // false - * - ******************************************************************************/ - -/** - * \brief Basic type traits categories - */ -enum Category -{ - NOT_A_NUMBER, - SIGNED_INTEGER, - UNSIGNED_INTEGER, - FLOATING_POINT -}; - - -/** - * \brief Basic type traits - */ -template -struct BaseTraits -{ - /// Category - static const Category CATEGORY = _CATEGORY; - enum - { - PRIMITIVE = _PRIMITIVE, - NULL_TYPE = _NULL_TYPE, - }; -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -/** - * Basic type traits (unsigned primitive specialization) - */ -template -struct BaseTraits -{ - typedef _UnsignedBits UnsignedBits; - - static const Category CATEGORY = UNSIGNED_INTEGER; - static const UnsignedBits MIN_KEY = UnsignedBits(0); - static const UnsignedBits MAX_KEY = UnsignedBits(-1); - - enum - { - PRIMITIVE = true, - NULL_TYPE = false, - }; - - - static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) - { - return key; - } - - static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) - { - return key; - } -}; - - -/** - * Basic type traits (signed primitive specialization) - */ -template -struct BaseTraits -{ - typedef _UnsignedBits UnsignedBits; - - static const Category CATEGORY = SIGNED_INTEGER; - static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); - static const UnsignedBits MIN_KEY = HIGH_BIT; - static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; - - enum - { - PRIMITIVE = true, - NULL_TYPE = false, - }; - - static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) - { - return key ^ HIGH_BIT; - }; - - static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) - { - return key ^ HIGH_BIT; - }; - -}; - - -/** - * Basic type traits (fp primitive specialization) - */ -template -struct BaseTraits -{ - typedef _UnsignedBits UnsignedBits; - - static const Category CATEGORY = FLOATING_POINT; - static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); - static const UnsignedBits MIN_KEY = UnsignedBits(-1); - static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; - - static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) - { - UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT; - return key ^ mask; - }; - - static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) - { - UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1); - return key ^ mask; - }; - - enum - { - PRIMITIVE = true, - NULL_TYPE = false, - }; -}; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/** - * \brief Numeric type traits - */ -template struct NumericTraits : BaseTraits {}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -template <> struct NumericTraits : BaseTraits {}; - -template <> struct NumericTraits : BaseTraits<(std::numeric_limits::is_signed) ? SIGNED_INTEGER : UNSIGNED_INTEGER, true, false, unsigned char> {}; -template <> struct NumericTraits : BaseTraits {}; -template <> struct NumericTraits : BaseTraits {}; -template <> struct NumericTraits : BaseTraits {}; -template <> struct NumericTraits : BaseTraits {}; -template <> struct NumericTraits : BaseTraits {}; - -template <> struct NumericTraits : BaseTraits {}; -template <> struct NumericTraits : BaseTraits {}; -template <> struct NumericTraits : BaseTraits {}; -template <> struct NumericTraits : BaseTraits {}; -template <> struct NumericTraits : BaseTraits {}; - -template <> struct NumericTraits : BaseTraits {}; -template <> struct NumericTraits : BaseTraits {}; - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/** - * \brief Type traits - */ -template -struct Traits : NumericTraits::Type> {}; - - - -/** @} */ // end group UtilModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/util_vector.cuh b/kokkos/kokkos/TPL/cub/util_vector.cuh deleted file mode 100644 index 9a432dc..0000000 --- a/kokkos/kokkos/TPL/cub/util_vector.cuh +++ /dev/null @@ -1,166 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * Vector type inference utilities - */ - -#pragma once - -#include - -#include "util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup UtilModule - * @{ - */ - - -/****************************************************************************** - * Vector type inference utilities. For example: - * - * typename VectorHelper::Type // Aliases uint2 - * - ******************************************************************************/ - -/** - * \brief Exposes a member typedef \p Type that names the corresponding CUDA vector type if one exists. Otherwise \p Type refers to the VectorHelper structure itself, which will wrap the corresponding \p x, \p y, etc. vector fields. - */ -template struct VectorHelper; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -enum -{ - /// The maximum number of elements in CUDA vector types - MAX_VEC_ELEMENTS = 4, -}; - - -/** - * Generic vector-1 type - */ -template -struct VectorHelper -{ - enum { BUILT_IN = false }; - - T x; - - typedef VectorHelper Type; -}; - -/** - * Generic vector-2 type - */ -template -struct VectorHelper -{ - enum { BUILT_IN = false }; - - T x; - T y; - - typedef VectorHelper Type; -}; - -/** - * Generic vector-3 type - */ -template -struct VectorHelper -{ - enum { BUILT_IN = false }; - - T x; - T y; - T z; - - typedef VectorHelper Type; -}; - -/** - * Generic vector-4 type - */ -template -struct VectorHelper -{ - enum { BUILT_IN = false }; - - T x; - T y; - T z; - T w; - - typedef VectorHelper Type; -}; - -/** - * Macro for expanding partially-specialized built-in vector types - */ -#define CUB_DEFINE_VECTOR_TYPE(base_type,short_type) \ - template<> struct VectorHelper { typedef short_type##1 Type; enum { BUILT_IN = true }; }; \ - template<> struct VectorHelper { typedef short_type##2 Type; enum { BUILT_IN = true }; }; \ - template<> struct VectorHelper { typedef short_type##3 Type; enum { BUILT_IN = true }; }; \ - template<> struct VectorHelper { typedef short_type##4 Type; enum { BUILT_IN = true }; }; - -// Expand CUDA vector types for built-in primitives -CUB_DEFINE_VECTOR_TYPE(char, char) -CUB_DEFINE_VECTOR_TYPE(signed char, char) -CUB_DEFINE_VECTOR_TYPE(short, short) -CUB_DEFINE_VECTOR_TYPE(int, int) -CUB_DEFINE_VECTOR_TYPE(long, long) -CUB_DEFINE_VECTOR_TYPE(long long, longlong) -CUB_DEFINE_VECTOR_TYPE(unsigned char, uchar) -CUB_DEFINE_VECTOR_TYPE(unsigned short, ushort) -CUB_DEFINE_VECTOR_TYPE(unsigned int, uint) -CUB_DEFINE_VECTOR_TYPE(unsigned long, ulong) -CUB_DEFINE_VECTOR_TYPE(unsigned long long, ulonglong) -CUB_DEFINE_VECTOR_TYPE(float, float) -CUB_DEFINE_VECTOR_TYPE(double, double) -CUB_DEFINE_VECTOR_TYPE(bool, uchar) - -// Undefine macros -#undef CUB_DEFINE_VECTOR_TYPE - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - -/** @} */ // end group UtilModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/warp/specializations/warp_reduce_shfl.cuh b/kokkos/kokkos/TPL/cub/warp/specializations/warp_reduce_shfl.cuh deleted file mode 100644 index 317b629..0000000 --- a/kokkos/kokkos/TPL/cub/warp/specializations/warp_reduce_shfl.cuh +++ /dev/null @@ -1,358 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::WarpReduceShfl provides SHFL-based variants of parallel reduction across CUDA warps. - */ - -#pragma once - -#include "../../thread/thread_operators.cuh" -#include "../../util_ptx.cuh" -#include "../../util_type.cuh" -#include "../../util_macro.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \brief WarpReduceShfl provides SHFL-based variants of parallel reduction across CUDA warps. - */ -template < - typename T, ///< Data type being reduced - int LOGICAL_WARPS, ///< Number of logical warps entrant - int LOGICAL_WARP_THREADS> ///< Number of threads per logical warp -struct WarpReduceShfl -{ - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - enum - { - /// The number of warp reduction steps - STEPS = Log2::VALUE, - - // The 5-bit SHFL mask for logically splitting warps into sub-segments - SHFL_MASK = (-1 << STEPS) & 31, - - // The 5-bit SFHL clamp - SHFL_CLAMP = LOGICAL_WARP_THREADS - 1, - - // The packed C argument (mask starts 8 bits up) - SHFL_C = (SHFL_MASK << 8) | SHFL_CLAMP, - }; - - - /// Shared memory storage layout type - typedef NullType TempStorage; - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - int warp_id; - int lane_id; - - - /****************************************************************************** - * Construction - ******************************************************************************/ - - /// Constructor - __device__ __forceinline__ WarpReduceShfl( - TempStorage &temp_storage, - int warp_id, - int lane_id) - : - warp_id(warp_id), - lane_id(lane_id) - {} - - - /****************************************************************************** - * Operation - ******************************************************************************/ - - /// Summation (single-SHFL) - template < - bool FULL_WARPS, ///< Whether all lanes in each warp are contributing a valid fold of items - int FOLDED_ITEMS_PER_LANE> ///< Number of items folded into each lane - __device__ __forceinline__ T Sum( - T input, ///< [in] Calling thread's input - int folded_items_per_warp, ///< [in] Total number of valid items folded into each logical warp - Int2Type single_shfl) ///< [in] Marker type indicating whether only one SHFL instruction is required - { - unsigned int output = reinterpret_cast(input); - - // Iterate reduction steps - #pragma unroll - for (int STEP = 0; STEP < STEPS; STEP++) - { - const int OFFSET = 1 << STEP; - - if (FULL_WARPS) - { - // Use predicate set from SHFL to guard against invalid peers - asm( - "{" - " .reg .u32 r0;" - " .reg .pred p;" - " shfl.down.b32 r0|p, %1, %2, %3;" - " @p add.u32 r0, r0, %4;" - " mov.u32 %0, r0;" - "}" - : "=r"(output) : "r"(output), "r"(OFFSET), "r"(SHFL_C), "r"(output)); - } - else - { - // Set range predicate to guard against invalid peers - asm( - "{" - " .reg .u32 r0;" - " .reg .pred p;" - " shfl.down.b32 r0, %1, %2, %3;" - " setp.lt.u32 p, %5, %6;" - " mov.u32 %0, %1;" - " @p add.u32 %0, %1, r0;" - "}" - : "=r"(output) : "r"(output), "r"(OFFSET), "r"(SHFL_C), "r"(output), "r"((lane_id + OFFSET) * FOLDED_ITEMS_PER_LANE), "r"(folded_items_per_warp)); - } - } - - return output; - } - - - /// Summation (multi-SHFL) - template < - bool FULL_WARPS, ///< Whether all lanes in each warp are contributing a valid fold of items - int FOLDED_ITEMS_PER_LANE> ///< Number of items folded into each lane - __device__ __forceinline__ T Sum( - T input, ///< [in] Calling thread's input - int folded_items_per_warp, ///< [in] Total number of valid items folded into each logical warp - Int2Type single_shfl) ///< [in] Marker type indicating whether only one SHFL instruction is required - { - // Delegate to generic reduce - return Reduce(input, folded_items_per_warp, cub::Sum()); - } - - - /// Summation (float) - template < - bool FULL_WARPS, ///< Whether all lanes in each warp are contributing a valid fold of items - int FOLDED_ITEMS_PER_LANE> ///< Number of items folded into each lane - __device__ __forceinline__ float Sum( - float input, ///< [in] Calling thread's input - int folded_items_per_warp) ///< [in] Total number of valid items folded into each logical warp - { - T output = input; - - // Iterate reduction steps - #pragma unroll - for (int STEP = 0; STEP < STEPS; STEP++) - { - const int OFFSET = 1 << STEP; - - if (FULL_WARPS) - { - // Use predicate set from SHFL to guard against invalid peers - asm( - "{" - " .reg .f32 r0;" - " .reg .pred p;" - " shfl.down.b32 r0|p, %1, %2, %3;" - " @p add.f32 r0, r0, %4;" - " mov.f32 %0, r0;" - "}" - : "=f"(output) : "f"(output), "r"(OFFSET), "r"(SHFL_C), "f"(output)); - } - else - { - // Set range predicate to guard against invalid peers - asm( - "{" - " .reg .f32 r0;" - " .reg .pred p;" - " shfl.down.b32 r0, %1, %2, %3;" - " setp.lt.u32 p, %5, %6;" - " mov.f32 %0, %1;" - " @p add.f32 %0, %0, r0;" - "}" - : "=f"(output) : "f"(output), "r"(OFFSET), "r"(SHFL_C), "f"(output), "r"((lane_id + OFFSET) * FOLDED_ITEMS_PER_LANE), "r"(folded_items_per_warp)); - } - } - - return output; - } - - /// Summation (generic) - template < - bool FULL_WARPS, ///< Whether all lanes in each warp are contributing a valid fold of items - int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane - typename _T> - __device__ __forceinline__ _T Sum( - _T input, ///< [in] Calling thread's input - int folded_items_per_warp) ///< [in] Total number of valid items folded into each logical warp - { - // Whether sharing can be done with a single SHFL instruction (vs multiple SFHL instructions) - Int2Type<(Traits<_T>::PRIMITIVE) && (sizeof(_T) <= sizeof(unsigned int))> single_shfl; - - return Sum(input, folded_items_per_warp, single_shfl); - } - - - /// Reduction - template < - bool FULL_WARPS, ///< Whether all lanes in each warp are contributing a valid fold of items - int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane - typename ReductionOp> - __device__ __forceinline__ T Reduce( - T input, ///< [in] Calling thread's input - int folded_items_per_warp, ///< [in] Total number of valid items folded into each logical warp - ReductionOp reduction_op) ///< [in] Binary reduction operator - { - typedef typename WordAlignment::ShuffleWord ShuffleWord; - - const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); - T output = input; - T temp; - ShuffleWord *temp_alias = reinterpret_cast(&temp); - ShuffleWord *output_alias = reinterpret_cast(&output); - - // Iterate scan steps - #pragma unroll - for (int STEP = 0; STEP < STEPS; STEP++) - { - // Grab addend from peer - const int OFFSET = 1 << STEP; - - #pragma unroll - for (int WORD = 0; WORD < WORDS; ++WORD) - { - unsigned int shuffle_word = output_alias[WORD]; - asm( - " shfl.down.b32 %0, %1, %2, %3;" - : "=r"(shuffle_word) : "r"(shuffle_word), "r"(OFFSET), "r"(SHFL_C)); - temp_alias[WORD] = (ShuffleWord) shuffle_word; - } - - // Perform reduction op if from a valid peer - if (FULL_WARPS) - { - if (lane_id < LOGICAL_WARP_THREADS - OFFSET) - output = reduction_op(output, temp); - } - else - { - if (((lane_id + OFFSET) * FOLDED_ITEMS_PER_LANE) < folded_items_per_warp) - output = reduction_op(output, temp); - } - } - - return output; - } - - - /// Segmented reduction - template < - bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail - typename Flag, - typename ReductionOp> - __device__ __forceinline__ T SegmentedReduce( - T input, ///< [in] Calling thread's input - Flag flag, ///< [in] Whether or not the current lane is a segment head/tail - ReductionOp reduction_op) ///< [in] Binary reduction operator - { - typedef typename WordAlignment::ShuffleWord ShuffleWord; - - T output = input; - - const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); - T temp; - ShuffleWord *temp_alias = reinterpret_cast(&temp); - ShuffleWord *output_alias = reinterpret_cast(&output); - - // Get the start flags for each thread in the warp. - int warp_flags = __ballot(flag); - - if (!HEAD_SEGMENTED) - warp_flags <<= 1; - - // Keep bits above the current thread. - warp_flags &= LaneMaskGt(); - - // Accommodate packing of multiple logical warps in a single physical warp - if ((LOGICAL_WARPS > 1) && (LOGICAL_WARP_THREADS < 32)) - warp_flags >>= (warp_id * LOGICAL_WARP_THREADS); - - // Find next flag - int next_flag = __clz(__brev(warp_flags)); - - // Clip the next segment at the warp boundary if necessary - if (LOGICAL_WARP_THREADS != 32) - next_flag = CUB_MIN(next_flag, LOGICAL_WARP_THREADS); - - // Iterate scan steps - #pragma unroll - for (int STEP = 0; STEP < STEPS; STEP++) - { - const int OFFSET = 1 << STEP; - - // Grab addend from peer - #pragma unroll - for (int WORD = 0; WORD < WORDS; ++WORD) - { - unsigned int shuffle_word = output_alias[WORD]; - - asm( - " shfl.down.b32 %0, %1, %2, %3;" - : "=r"(shuffle_word) : "r"(shuffle_word), "r"(OFFSET), "r"(SHFL_C)); - temp_alias[WORD] = (ShuffleWord) shuffle_word; - - } - - // Perform reduction op if valid - if (OFFSET < next_flag - lane_id) - output = reduction_op(output, temp); - } - - return output; - } -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/warp/specializations/warp_reduce_smem.cuh b/kokkos/kokkos/TPL/cub/warp/specializations/warp_reduce_smem.cuh deleted file mode 100644 index a32d5fd..0000000 --- a/kokkos/kokkos/TPL/cub/warp/specializations/warp_reduce_smem.cuh +++ /dev/null @@ -1,291 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::WarpReduceSmem provides smem-based variants of parallel reduction across CUDA warps. - */ - -#pragma once - -#include "../../thread/thread_operators.cuh" -#include "../../thread/thread_load.cuh" -#include "../../thread/thread_store.cuh" -#include "../../util_type.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \brief WarpReduceSmem provides smem-based variants of parallel reduction across CUDA warps. - */ -template < - typename T, ///< Data type being reduced - int LOGICAL_WARPS, ///< Number of logical warps entrant - int LOGICAL_WARP_THREADS> ///< Number of threads per logical warp -struct WarpReduceSmem -{ - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - enum - { - /// Whether the logical warp size is a power-of-two - POW_OF_TWO = ((LOGICAL_WARP_THREADS & (LOGICAL_WARP_THREADS - 1)) == 0), - - /// The number of warp scan steps - STEPS = Log2::VALUE, - - /// The number of threads in half a warp - HALF_WARP_THREADS = 1 << (STEPS - 1), - - /// The number of shared memory elements per warp - WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS, - }; - - /// Shared memory flag type - typedef unsigned char SmemFlag; - - /// Shared memory storage layout type (1.5 warps-worth of elements for each warp) - typedef T _TempStorage[LOGICAL_WARPS][WARP_SMEM_ELEMENTS]; - - // Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - _TempStorage &temp_storage; - int warp_id; - int lane_id; - - - /****************************************************************************** - * Construction - ******************************************************************************/ - - /// Constructor - __device__ __forceinline__ WarpReduceSmem( - TempStorage &temp_storage, - int warp_id, - int lane_id) - : - temp_storage(temp_storage.Alias()), - warp_id(warp_id), - lane_id(lane_id) - {} - - - /****************************************************************************** - * Operation - ******************************************************************************/ - - /** - * Reduction - */ - template < - bool FULL_WARPS, ///< Whether all lanes in each warp are contributing a valid fold of items - int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane - typename ReductionOp> - __device__ __forceinline__ T Reduce( - T input, ///< [in] Calling thread's input - int folded_items_per_warp, ///< [in] Total number of valid items folded into each logical warp - ReductionOp reduction_op) ///< [in] Reduction operator - { - for (int STEP = 0; STEP < STEPS; STEP++) - { - const int OFFSET = 1 << STEP; - - // Share input through buffer - ThreadStore(&temp_storage[warp_id][lane_id], input); - - // Update input if peer_addend is in range - if ((FULL_WARPS && POW_OF_TWO) || ((lane_id + OFFSET) * FOLDED_ITEMS_PER_LANE < folded_items_per_warp)) - { - T peer_addend = ThreadLoad(&temp_storage[warp_id][lane_id + OFFSET]); - input = reduction_op(input, peer_addend); - } - } - - return input; - } - - - /** - * Segmented reduction - */ - template < - bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail - typename Flag, - typename ReductionOp> - __device__ __forceinline__ T SegmentedReduce( - T input, ///< [in] Calling thread's input - Flag flag, ///< [in] Whether or not the current lane is a segment head/tail - ReductionOp reduction_op) ///< [in] Reduction operator - { - #if CUB_PTX_ARCH >= 200 - - // Ballot-based segmented reduce - - // Get the start flags for each thread in the warp. - int warp_flags = __ballot(flag); - - if (!HEAD_SEGMENTED) - warp_flags <<= 1; - - // Keep bits above the current thread. - warp_flags &= LaneMaskGt(); - - // Accommodate packing of multiple logical warps in a single physical warp - if ((LOGICAL_WARPS > 1) && (LOGICAL_WARP_THREADS < 32)) - warp_flags >>= (warp_id * LOGICAL_WARP_THREADS); - - // Find next flag - int next_flag = __clz(__brev(warp_flags)); - - // Clip the next segment at the warp boundary if necessary - if (LOGICAL_WARP_THREADS != 32) - next_flag = CUB_MIN(next_flag, LOGICAL_WARP_THREADS); - - for (int STEP = 0; STEP < STEPS; STEP++) - { - const int OFFSET = 1 << STEP; - - // Share input into buffer - ThreadStore(&temp_storage[warp_id][lane_id], input); - - // Update input if peer_addend is in range - if (OFFSET < next_flag - lane_id) - { - T peer_addend = ThreadLoad(&temp_storage[warp_id][lane_id + OFFSET]); - input = reduction_op(input, peer_addend); - } - } - - return input; - - #else - - // Smem-based segmented reduce - - enum - { - UNSET = 0x0, // Is initially unset - SET = 0x1, // Is initially set - SEEN = 0x2, // Has seen another head flag from a successor peer - }; - - // Alias flags onto shared data storage - volatile SmemFlag *flag_storage = reinterpret_cast(temp_storage[warp_id]); - - SmemFlag flag_status = (flag) ? SET : UNSET; - - for (int STEP = 0; STEP < STEPS; STEP++) - { - const int OFFSET = 1 << STEP; - - // Share input through buffer - ThreadStore(&temp_storage[warp_id][lane_id], input); - - // Get peer from buffer - T peer_addend = ThreadLoad(&temp_storage[warp_id][lane_id + OFFSET]); - - // Share flag through buffer - flag_storage[lane_id] = flag_status; - - // Get peer flag from buffer - SmemFlag peer_flag_status = flag_storage[lane_id + OFFSET]; - - // Update input if peer was in range - if (lane_id < LOGICAL_WARP_THREADS - OFFSET) - { - if (HEAD_SEGMENTED) - { - // Head-segmented - if ((flag_status & SEEN) == 0) - { - // Has not seen a more distant head flag - if (peer_flag_status & SET) - { - // Has now seen a head flag - flag_status |= SEEN; - } - else - { - // Peer is not a head flag: grab its count - input = reduction_op(input, peer_addend); - } - - // Update seen status to include that of peer - flag_status |= (peer_flag_status & SEEN); - } - } - else - { - // Tail-segmented. Simply propagate flag status - if (!flag_status) - { - input = reduction_op(input, peer_addend); - flag_status |= peer_flag_status; - } - - } - } - } - - return input; - - #endif - } - - - /** - * Summation - */ - template < - bool FULL_WARPS, ///< Whether all lanes in each warp are contributing a valid fold of items - int FOLDED_ITEMS_PER_LANE> ///< Number of items folded into each lane - __device__ __forceinline__ T Sum( - T input, ///< [in] Calling thread's input - int folded_items_per_warp) ///< [in] Total number of valid items folded into each logical warp - { - return Reduce(input, folded_items_per_warp, cub::Sum()); - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/warp/specializations/warp_scan_shfl.cuh b/kokkos/kokkos/TPL/cub/warp/specializations/warp_scan_shfl.cuh deleted file mode 100644 index 5585396..0000000 --- a/kokkos/kokkos/TPL/cub/warp/specializations/warp_scan_shfl.cuh +++ /dev/null @@ -1,371 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::WarpScanShfl provides SHFL-based variants of parallel prefix scan across CUDA warps. - */ - -#pragma once - -#include "../../thread/thread_operators.cuh" -#include "../../util_type.cuh" -#include "../../util_ptx.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \brief WarpScanShfl provides SHFL-based variants of parallel prefix scan across CUDA warps. - */ -template < - typename T, ///< Data type being scanned - int LOGICAL_WARPS, ///< Number of logical warps entrant - int LOGICAL_WARP_THREADS> ///< Number of threads per logical warp -struct WarpScanShfl -{ - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - enum - { - /// The number of warp scan steps - STEPS = Log2::VALUE, - - // The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up - SHFL_C = ((-1 << STEPS) & 31) << 8, - }; - - /// Shared memory storage layout type - typedef NullType TempStorage; - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - int warp_id; - int lane_id; - - /****************************************************************************** - * Construction - ******************************************************************************/ - - /// Constructor - __device__ __forceinline__ WarpScanShfl( - TempStorage &temp_storage, - int warp_id, - int lane_id) - : - warp_id(warp_id), - lane_id(lane_id) - {} - - - /****************************************************************************** - * Operation - ******************************************************************************/ - - /// Broadcast - __device__ __forceinline__ T Broadcast( - T input, ///< [in] The value to broadcast - int src_lane) ///< [in] Which warp lane is to do the broadcasting - { - typedef typename WordAlignment::ShuffleWord ShuffleWord; - - const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); - T output; - ShuffleWord *output_alias = reinterpret_cast(&output); - ShuffleWord *input_alias = reinterpret_cast(&input); - - #pragma unroll - for (int WORD = 0; WORD < WORDS; ++WORD) - { - unsigned int shuffle_word = input_alias[WORD]; - asm("shfl.idx.b32 %0, %1, %2, %3;" - : "=r"(shuffle_word) : "r"(shuffle_word), "r"(src_lane), "r"(LOGICAL_WARP_THREADS - 1)); - output_alias[WORD] = (ShuffleWord) shuffle_word; - } - - return output; - } - - - //--------------------------------------------------------------------- - // Inclusive operations - //--------------------------------------------------------------------- - - /// Inclusive prefix sum with aggregate (single-SHFL) - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T &warp_aggregate, ///< [out] Warp-wide aggregate reduction of input items. - Int2Type single_shfl) - { - unsigned int temp = reinterpret_cast(input); - - // Iterate scan steps - #pragma unroll - for (int STEP = 0; STEP < STEPS; STEP++) - { - // Use predicate set from SHFL to guard against invalid peers - asm( - "{" - " .reg .u32 r0;" - " .reg .pred p;" - " shfl.up.b32 r0|p, %1, %2, %3;" - " @p add.u32 r0, r0, %4;" - " mov.u32 %0, r0;" - "}" - : "=r"(temp) : "r"(temp), "r"(1 << STEP), "r"(SHFL_C), "r"(temp)); - } - - output = temp; - - // Grab aggregate from last warp lane - warp_aggregate = Broadcast(output, LOGICAL_WARP_THREADS - 1); - } - - - /// Inclusive prefix sum with aggregate (multi-SHFL) - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T &warp_aggregate, ///< [out] Warp-wide aggregate reduction of input items. - Int2Type single_shfl) ///< [in] Marker type indicating whether only one SHFL instruction is required - { - // Delegate to generic scan - InclusiveScan(input, output, Sum(), warp_aggregate); - } - - - /// Inclusive prefix sum with aggregate (specialized for float) - __device__ __forceinline__ void InclusiveSum( - float input, ///< [in] Calling thread's input item. - float &output, ///< [out] Calling thread's output item. May be aliased with \p input. - float &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - output = input; - - // Iterate scan steps - #pragma unroll - for (int STEP = 0; STEP < STEPS; STEP++) - { - // Use predicate set from SHFL to guard against invalid peers - asm( - "{" - " .reg .f32 r0;" - " .reg .pred p;" - " shfl.up.b32 r0|p, %1, %2, %3;" - " @p add.f32 r0, r0, %4;" - " mov.f32 %0, r0;" - "}" - : "=f"(output) : "f"(output), "r"(1 << STEP), "r"(SHFL_C), "f"(output)); - } - - // Grab aggregate from last warp lane - warp_aggregate = Broadcast(output, LOGICAL_WARP_THREADS - 1); - } - - - /// Inclusive prefix sum with aggregate (specialized for unsigned long long) - __device__ __forceinline__ void InclusiveSum( - unsigned long long input, ///< [in] Calling thread's input item. - unsigned long long &output, ///< [out] Calling thread's output item. May be aliased with \p input. - unsigned long long &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - output = input; - - // Iterate scan steps - #pragma unroll - for (int STEP = 0; STEP < STEPS; STEP++) - { - // Use predicate set from SHFL to guard against invalid peers - asm( - "{" - " .reg .u32 r0;" - " .reg .u32 r1;" - " .reg .u32 lo;" - " .reg .u32 hi;" - " .reg .pred p;" - " mov.b64 {lo, hi}, %1;" - " shfl.up.b32 r0|p, lo, %2, %3;" - " shfl.up.b32 r1|p, hi, %2, %3;" - " @p add.cc.u32 r0, r0, lo;" - " @p addc.u32 r1, r1, hi;" - " mov.b64 %0, {r0, r1};" - "}" - : "=l"(output) : "l"(output), "r"(1 << STEP), "r"(SHFL_C)); - } - - // Grab aggregate from last warp lane - warp_aggregate = Broadcast(output, LOGICAL_WARP_THREADS - 1); - } - - - /// Inclusive prefix sum with aggregate (generic) - template - __device__ __forceinline__ void InclusiveSum( - _T input, ///< [in] Calling thread's input item. - _T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - _T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - // Whether sharing can be done with a single SHFL instruction (vs multiple SFHL instructions) - Int2Type<(Traits<_T>::PRIMITIVE) && (sizeof(_T) <= sizeof(unsigned int))> single_shfl; - - InclusiveSum(input, output, warp_aggregate, single_shfl); - } - - - /// Inclusive prefix sum - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output) ///< [out] Calling thread's output item. May be aliased with \p input. - { - T warp_aggregate; - InclusiveSum(input, output, warp_aggregate); - } - - - /// Inclusive scan with aggregate - template - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - output = input; - - // Iterate scan steps - #pragma unroll - for (int STEP = 0; STEP < STEPS; STEP++) - { - // Grab addend from peer - const int OFFSET = 1 << STEP; - T temp = ShuffleUp(output, OFFSET); - - // Perform scan op if from a valid peer - if (lane_id >= OFFSET) - output = scan_op(temp, output); - } - - // Grab aggregate from last warp lane - warp_aggregate = Broadcast(output, LOGICAL_WARP_THREADS - 1); - } - - - /// Inclusive scan - template - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op) ///< [in] Binary scan operator - { - T warp_aggregate; - InclusiveScan(input, output, scan_op, warp_aggregate); - } - - - //--------------------------------------------------------------------- - // Exclusive operations - //--------------------------------------------------------------------- - - /// Exclusive scan with aggregate - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - // Compute inclusive scan - T inclusive; - InclusiveScan(input, inclusive, scan_op, warp_aggregate); - - // Grab result from predecessor - T exclusive = ShuffleUp(inclusive, 1); - - output = (lane_id == 0) ? - identity : - exclusive; - } - - - /// Exclusive scan - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T identity, ///< [in] Identity value - ScanOp scan_op) ///< [in] Binary scan operator - { - T warp_aggregate; - ExclusiveScan(input, output, identity, scan_op, warp_aggregate); - } - - - /// Exclusive scan with aggregate, without identity - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - // Compute inclusive scan - T inclusive; - InclusiveScan(input, inclusive, scan_op, warp_aggregate); - - // Grab result from predecessor - output = ShuffleUp(inclusive, 1); - } - - - /// Exclusive scan without identity - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op) ///< [in] Binary scan operator - { - T warp_aggregate; - ExclusiveScan(input, output, scan_op, warp_aggregate); - } -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/warp/specializations/warp_scan_smem.cuh b/kokkos/kokkos/TPL/cub/warp/specializations/warp_scan_smem.cuh deleted file mode 100644 index 513b35c..0000000 --- a/kokkos/kokkos/TPL/cub/warp/specializations/warp_scan_smem.cuh +++ /dev/null @@ -1,327 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * cub::WarpScanSmem provides smem-based variants of parallel prefix scan across CUDA warps. - */ - -#pragma once - -#include "../../thread/thread_operators.cuh" -#include "../../thread/thread_load.cuh" -#include "../../thread/thread_store.cuh" -#include "../../util_type.cuh" -#include "../../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \brief WarpScanSmem provides smem-based variants of parallel prefix scan across CUDA warps. - */ -template < - typename T, ///< Data type being scanned - int LOGICAL_WARPS, ///< Number of logical warps entrant - int LOGICAL_WARP_THREADS> ///< Number of threads per logical warp -struct WarpScanSmem -{ - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - enum - { - /// The number of warp scan steps - STEPS = Log2::VALUE, - - /// The number of threads in half a warp - HALF_WARP_THREADS = 1 << (STEPS - 1), - - /// The number of shared memory elements per warp - WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS, - }; - - - /// Shared memory storage layout type (1.5 warps-worth of elements for each warp) - typedef T _TempStorage[LOGICAL_WARPS][WARP_SMEM_ELEMENTS]; - - // Alias wrapper allowing storage to be unioned - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - _TempStorage &temp_storage; - unsigned int warp_id; - unsigned int lane_id; - - - /****************************************************************************** - * Construction - ******************************************************************************/ - - /// Constructor - __device__ __forceinline__ WarpScanSmem( - TempStorage &temp_storage, - int warp_id, - int lane_id) - : - temp_storage(temp_storage.Alias()), - warp_id(warp_id), - lane_id(lane_id) - {} - - - /****************************************************************************** - * Operation - ******************************************************************************/ - - /// Initialize identity padding (specialized for operations that have identity) - __device__ __forceinline__ void InitIdentity(Int2Type has_identity) - { - T identity = T(); - ThreadStore(&temp_storage[warp_id][lane_id], identity); - } - - - /// Initialize identity padding (specialized for operations without identity) - __device__ __forceinline__ void InitIdentity(Int2Type has_identity) - {} - - - /// Basic inclusive scan iteration(template unrolled, base-case specialization) - template < - bool HAS_IDENTITY, - typename ScanOp> - __device__ __forceinline__ void ScanStep( - T &partial, - ScanOp scan_op, - Int2Type step) - {} - - - /// Basic inclusive scan iteration (template unrolled, inductive-case specialization) - template < - bool HAS_IDENTITY, - int STEP, - typename ScanOp> - __device__ __forceinline__ void ScanStep( - T &partial, - ScanOp scan_op, - Int2Type step) - { - const int OFFSET = 1 << STEP; - - // Share partial into buffer - ThreadStore(&temp_storage[warp_id][HALF_WARP_THREADS + lane_id], partial); - - // Update partial if addend is in range - if (HAS_IDENTITY || (lane_id >= OFFSET)) - { - T addend = ThreadLoad(&temp_storage[warp_id][HALF_WARP_THREADS + lane_id - OFFSET]); - partial = scan_op(addend, partial); - } - - ScanStep(partial, scan_op, Int2Type()); - } - - - /// Broadcast - __device__ __forceinline__ T Broadcast( - T input, ///< [in] The value to broadcast - unsigned int src_lane) ///< [in] Which warp lane is to do the broadcasting - { - if (lane_id == src_lane) - { - ThreadStore(temp_storage[warp_id], input); - } - - return ThreadLoad(temp_storage[warp_id]); - } - - - /// Basic inclusive scan - template < - bool HAS_IDENTITY, - bool SHARE_FINAL, - typename ScanOp> - __device__ __forceinline__ T BasicScan( - T partial, ///< Calling thread's input partial reduction - ScanOp scan_op) ///< Binary associative scan functor - { - // Iterate scan steps - ScanStep(partial, scan_op, Int2Type<0>()); - - if (SHARE_FINAL) - { - // Share partial into buffer - ThreadStore(&temp_storage[warp_id][HALF_WARP_THREADS + lane_id], partial); - } - - return partial; - } - - - /// Inclusive prefix sum - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output) ///< [out] Calling thread's output item. May be aliased with \p input. - { - const bool HAS_IDENTITY = Traits::PRIMITIVE; - - // Initialize identity region - InitIdentity(Int2Type()); - - // Compute inclusive warp scan (has identity, don't share final) - output = BasicScan(input, Sum()); - } - - - /// Inclusive prefix sum with aggregate - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - const bool HAS_IDENTITY = Traits::PRIMITIVE; - - // Initialize identity region - InitIdentity(Int2Type()); - - // Compute inclusive warp scan (has identity, share final) - output = BasicScan(input, Sum()); - - // Retrieve aggregate in warp-lane0 - warp_aggregate = ThreadLoad(&temp_storage[warp_id][WARP_SMEM_ELEMENTS - 1]); - } - - - /// Inclusive scan - template - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op) ///< [in] Binary scan operator - { - // Compute inclusive warp scan (no identity, don't share final) - output = BasicScan(input, scan_op); - } - - - /// Inclusive scan with aggregate - template - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - // Compute inclusive warp scan (no identity, share final) - output = BasicScan(input, scan_op); - - // Retrieve aggregate - warp_aggregate = ThreadLoad(&temp_storage[warp_id][WARP_SMEM_ELEMENTS - 1]); - } - - /// Exclusive scan - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T identity, ///< [in] Identity value - ScanOp scan_op) ///< [in] Binary scan operator - { - // Initialize identity region - ThreadStore(&temp_storage[warp_id][lane_id], identity); - - // Compute inclusive warp scan (identity, share final) - T inclusive = BasicScan(input, scan_op); - - // Retrieve exclusive scan - output = ThreadLoad(&temp_storage[warp_id][HALF_WARP_THREADS + lane_id - 1]); - } - - - /// Exclusive scan with aggregate - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - // Exclusive warp scan (which does share final) - ExclusiveScan(input, output, identity, scan_op); - - // Retrieve aggregate - warp_aggregate = ThreadLoad(&temp_storage[warp_id][WARP_SMEM_ELEMENTS - 1]); - } - - - /// Exclusive scan without identity - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op) ///< [in] Binary scan operator - { - // Compute inclusive warp scan (no identity, share final) - T inclusive = BasicScan(input, scan_op); - - // Retrieve exclusive scan - output = ThreadLoad(&temp_storage[warp_id][HALF_WARP_THREADS + lane_id - 1]); - } - - - /// Exclusive scan with aggregate, without identity - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - // Exclusive warp scan (which does share final) - ExclusiveScan(input, output, scan_op); - - // Retrieve aggregate - warp_aggregate = ThreadLoad(&temp_storage[warp_id][WARP_SMEM_ELEMENTS - 1]); - } - -}; - - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/warp/warp_reduce.cuh b/kokkos/kokkos/TPL/cub/warp/warp_reduce.cuh deleted file mode 100644 index 548369d..0000000 --- a/kokkos/kokkos/TPL/cub/warp/warp_reduce.cuh +++ /dev/null @@ -1,677 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * The cub::WarpReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across CUDA warp threads. - */ - -#pragma once - -#include "specializations/warp_reduce_shfl.cuh" -#include "specializations/warp_reduce_smem.cuh" -#include "../thread/thread_operators.cuh" -#include "../util_arch.cuh" -#include "../util_type.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - - -/** - * \addtogroup WarpModule - * @{ - */ - -/** - * \brief The WarpReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across CUDA warp threads. ![](warp_reduce_logo.png) - * - * \par Overview - * A reduction (or fold) - * uses a binary combining operator to compute a single aggregate from a list of input elements. - * - * \tparam T The reduction input/output element type - * \tparam LOGICAL_WARPS [optional] The number of entrant "logical" warps performing concurrent warp reductions. Default is 1. - * \tparam LOGICAL_WARP_THREADS [optional] The number of threads per "logical" warp (may be less than the number of hardware warp threads). Default is the warp size of the targeted CUDA compute-capability (e.g., 32 threads for SM20). - * - * \par Simple Examples - * \warpcollective{WarpReduce} - * \par - * The code snippet below illustrates four concurrent warp sum reductions within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpReduce for 4 warps on type int - * typedef cub::WarpReduce WarpReduce; - * - * // Allocate shared memory for WarpReduce - * __shared__ typename WarpReduce::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Return the warp-wide sums to each lane0 (threads 0, 32, 64, and 96) - * int aggregate = WarpReduce(temp_storage).Sum(thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, 1, 2, 3, ..., 127. - * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 496, \p 1520, - * \p 2544, and \p 3568, respectively (and is undefined in other threads). - * - * \par - * The code snippet below illustrates a single warp sum reduction within a block of - * 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpReduce for one warp on type int - * typedef cub::WarpReduce WarpReduce; - * - * // Allocate shared memory for WarpReduce - * __shared__ typename WarpReduce::TempStorage temp_storage; - * ... - * - * // Only the first warp performs a reduction - * if (threadIdx.x < 32) - * { - * // Obtain one input item per thread - * int thread_data = ... - * - * // Return the warp-wide sum to lane0 - * int aggregate = WarpReduce(temp_storage).Sum(thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the warp of threads is 0, 1, 2, 3, ..., 31. - * The corresponding output \p aggregate in thread0 will be \p 496 (and is undefined in other threads). - * - * \par Usage and Performance Considerations - * - Supports "logical" warps smaller than the physical warp size (e.g., logical warps of 8 threads) - * - The number of entrant threads must be an multiple of \p LOGICAL_WARP_THREADS - * - Warp reductions are concurrent if more than one logical warp is participating - * - Uses special instructions when applicable (e.g., warp \p SHFL instructions) - * - Uses synchronization-free communication between warp lanes when applicable - * - Zero bank conflicts for most types - * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: - * - Summation (vs. generic reduction) - * - The architecture's warp size is a whole multiple of \p LOGICAL_WARP_THREADS - * - */ -template < - typename T, - int LOGICAL_WARPS = 1, - int LOGICAL_WARP_THREADS = PtxArchProps::WARP_THREADS> -class WarpReduce -{ -private: - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - enum - { - POW_OF_TWO = ((LOGICAL_WARP_THREADS & (LOGICAL_WARP_THREADS - 1)) == 0), - }; - -public: - - #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - - /// Internal specialization. Use SHFL-based reduction if (architecture is >= SM30) and ((only one logical warp) or (LOGICAL_WARP_THREADS is a power-of-two)) - typedef typename If<(CUB_PTX_ARCH >= 300) && ((LOGICAL_WARPS == 1) || POW_OF_TWO), - WarpReduceShfl, - WarpReduceSmem >::Type InternalWarpReduce; - - #endif // DOXYGEN_SHOULD_SKIP_THIS - - -private: - - /// Shared memory storage layout type for WarpReduce - typedef typename InternalWarpReduce::TempStorage _TempStorage; - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Shared storage reference - _TempStorage &temp_storage; - - /// Warp ID - int warp_id; - - /// Lane ID - int lane_id; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /// Internal storage allocator - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ TempStorage private_storage; - return private_storage; - } - - -public: - - /// \smemstorage{WarpReduce} - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Logical warp and lane identifiers are constructed from threadIdx.x. - * - */ - __device__ __forceinline__ WarpReduce() - : - temp_storage(PrivateStorage()), - warp_id((LOGICAL_WARPS == 1) ? - 0 : - threadIdx.x / LOGICAL_WARP_THREADS), - lane_id(((LOGICAL_WARPS == 1) || (LOGICAL_WARP_THREADS == PtxArchProps::WARP_THREADS)) ? - LaneId() : - threadIdx.x % LOGICAL_WARP_THREADS) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Logical warp and lane identifiers are constructed from threadIdx.x. - */ - __device__ __forceinline__ WarpReduce( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - warp_id((LOGICAL_WARPS == 1) ? - 0 : - threadIdx.x / LOGICAL_WARP_THREADS), - lane_id(((LOGICAL_WARPS == 1) || (LOGICAL_WARP_THREADS == PtxArchProps::WARP_THREADS)) ? - LaneId() : - threadIdx.x % LOGICAL_WARP_THREADS) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Threads are identified using the given warp and lane identifiers. - */ - __device__ __forceinline__ WarpReduce( - int warp_id, ///< [in] A suitable warp membership identifier - int lane_id) ///< [in] A lane identifier within the warp - : - temp_storage(PrivateStorage()), - warp_id(warp_id), - lane_id(lane_id) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Threads are identified using the given warp and lane identifiers. - */ - __device__ __forceinline__ WarpReduce( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int warp_id, ///< [in] A suitable warp membership identifier - int lane_id) ///< [in] A lane identifier within the warp - : - temp_storage(temp_storage.Alias()), - warp_id(warp_id), - lane_id(lane_id) - {} - - - - //@} end member group - /******************************************************************//** - * \name Summation reductions - *********************************************************************/ - //@{ - - - /** - * \brief Computes a warp-wide sum in each active warp. The output is valid in warp lane0. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp sum reductions within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpReduce for 4 warps on type int - * typedef cub::WarpReduce WarpReduce; - * - * // Allocate shared memory for WarpReduce - * __shared__ typename WarpReduce::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Return the warp-wide sums to each lane0 - * int aggregate = WarpReduce(temp_storage).Sum(thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, 1, 2, 3, ..., 127. - * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 496, \p 1520, - * \p 2544, and \p 3568, respectively (and is undefined in other threads). - * - */ - __device__ __forceinline__ T Sum( - T input) ///< [in] Calling thread's input - { - return InternalWarpReduce(temp_storage, warp_id, lane_id).Sum(input, LOGICAL_WARP_THREADS); - } - - /** - * \brief Computes a partially-full warp-wide sum in each active warp. The output is valid in warp lane0. - * - * All threads in each logical warp must agree on the same value for \p valid_items. Otherwise the result is undefined. - * - * \smemreuse - * - * The code snippet below illustrates a sum reduction within a single, partially-full - * block of 32 threads (one warp). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, int valid_items) - * { - * // Specialize WarpReduce for a single warp on type int - * typedef cub::WarpReduce WarpReduce; - * - * // Allocate shared memory for WarpReduce - * __shared__ typename WarpReduce::TempStorage temp_storage; - * - * // Obtain one input item per thread if in range - * int thread_data; - * if (threadIdx.x < valid_items) - * thread_data = d_data[threadIdx.x]; - * - * // Return the warp-wide sums to each lane0 - * int aggregate = WarpReduce(temp_storage).Sum( - * thread_data, valid_items); - * - * \endcode - * \par - * Suppose the input \p d_data is 0, 1, 2, 3, 4, ... and \p valid_items - * is \p 4. The corresponding output \p aggregate in thread0 is \p 6 (and is - * undefined in other threads). - * - */ - __device__ __forceinline__ T Sum( - T input, ///< [in] Calling thread's input - int valid_items) ///< [in] Total number of valid items in the calling thread's logical warp (may be less than \p LOGICAL_WARP_THREADS) - { - // Determine if we don't need bounds checking - if (valid_items >= LOGICAL_WARP_THREADS) - { - return InternalWarpReduce(temp_storage, warp_id, lane_id).Sum(input, valid_items); - } - else - { - return InternalWarpReduce(temp_storage, warp_id, lane_id).Sum(input, valid_items); - } - } - - - /** - * \brief Computes a segmented sum in each active warp where segments are defined by head-flags. The sum of each segment is returned to the first lane in that segment (which always includes lane0). - * - * \smemreuse - * - * The code snippet below illustrates a head-segmented warp sum - * reduction within a block of 32 threads (one warp). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpReduce for a single warp on type int - * typedef cub::WarpReduce WarpReduce; - * - * // Allocate shared memory for WarpReduce - * __shared__ typename WarpReduce::TempStorage temp_storage; - * - * // Obtain one input item and flag per thread - * int thread_data = ... - * int head_flag = ... - * - * // Return the warp-wide sums to each lane0 - * int aggregate = WarpReduce(temp_storage).HeadSegmentedSum( - * thread_data, head_flag); - * - * \endcode - * \par - * Suppose the set of input \p thread_data and \p head_flag across the block of threads - * is 0, 1, 2, 3, ..., 31 and is 1, 0, 0, 0, 1, 0, 0, 0, ..., 1, 0, 0, 0, - * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be - * \p 6, \p 22, \p 38, etc. (and is undefined in other threads). - * - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - * - */ - template < - typename Flag> - __device__ __forceinline__ T HeadSegmentedSum( - T input, ///< [in] Calling thread's input - Flag head_flag) ///< [in] Head flag denoting whether or not \p input is the start of a new segment - { - return HeadSegmentedReduce(input, head_flag, cub::Sum()); - } - - - /** - * \brief Computes a segmented sum in each active warp where segments are defined by tail-flags. The sum of each segment is returned to the first lane in that segment (which always includes lane0). - * - * \smemreuse - * - * The code snippet below illustrates a tail-segmented warp sum - * reduction within a block of 32 threads (one warp). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpReduce for a single warp on type int - * typedef cub::WarpReduce WarpReduce; - * - * // Allocate shared memory for WarpReduce - * __shared__ typename WarpReduce::TempStorage temp_storage; - * - * // Obtain one input item and flag per thread - * int thread_data = ... - * int tail_flag = ... - * - * // Return the warp-wide sums to each lane0 - * int aggregate = WarpReduce(temp_storage).TailSegmentedSum( - * thread_data, tail_flag); - * - * \endcode - * \par - * Suppose the set of input \p thread_data and \p tail_flag across the block of threads - * is 0, 1, 2, 3, ..., 31 and is 0, 0, 0, 1, 0, 0, 0, 1, ..., 0, 0, 0, 1, - * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be - * \p 6, \p 22, \p 38, etc. (and is undefined in other threads). - * - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ - template < - typename Flag> - __device__ __forceinline__ T TailSegmentedSum( - T input, ///< [in] Calling thread's input - Flag tail_flag) ///< [in] Head flag denoting whether or not \p input is the start of a new segment - { - return TailSegmentedReduce(input, tail_flag, cub::Sum()); - } - - - - //@} end member group - /******************************************************************//** - * \name Generic reductions - *********************************************************************/ - //@{ - - /** - * \brief Computes a warp-wide reduction in each active warp using the specified binary reduction functor. The output is valid in warp lane0. - * - * Supports non-commutative reduction operators - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp max reductions within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpReduce for 4 warps on type int - * typedef cub::WarpReduce WarpReduce; - * - * // Allocate shared memory for WarpReduce - * __shared__ typename WarpReduce::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Return the warp-wide reductions to each lane0 - * int aggregate = WarpReduce(temp_storage).Reduce( - * thread_data, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, 1, 2, 3, ..., 127. - * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 31, \p 63, - * \p 95, and \p 127, respectively (and is undefined in other threads). - * - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ T Reduce( - T input, ///< [in] Calling thread's input - ReductionOp reduction_op) ///< [in] Binary reduction operator - { - return InternalWarpReduce(temp_storage, warp_id, lane_id).Reduce(input, LOGICAL_WARP_THREADS, reduction_op); - } - - /** - * \brief Computes a partially-full warp-wide reduction in each active warp using the specified binary reduction functor. The output is valid in warp lane0. - * - * All threads in each logical warp must agree on the same value for \p valid_items. Otherwise the result is undefined. - * - * Supports non-commutative reduction operators - * - * \smemreuse - * - * The code snippet below illustrates a max reduction within a single, partially-full - * block of 32 threads (one warp). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(int *d_data, int valid_items) - * { - * // Specialize WarpReduce for a single warp on type int - * typedef cub::WarpReduce WarpReduce; - * - * // Allocate shared memory for WarpReduce - * __shared__ typename WarpReduce::TempStorage temp_storage; - * - * // Obtain one input item per thread if in range - * int thread_data; - * if (threadIdx.x < valid_items) - * thread_data = d_data[threadIdx.x]; - * - * // Return the warp-wide reductions to each lane0 - * int aggregate = WarpReduce(temp_storage).Reduce( - * thread_data, cub::Max(), valid_items); - * - * \endcode - * \par - * Suppose the input \p d_data is 0, 1, 2, 3, 4, ... and \p valid_items - * is \p 4. The corresponding output \p aggregate in thread0 is \p 3 (and is - * undefined in other threads). - * - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ T Reduce( - T input, ///< [in] Calling thread's input - ReductionOp reduction_op, ///< [in] Binary reduction operator - int valid_items) ///< [in] Total number of valid items in the calling thread's logical warp (may be less than \p LOGICAL_WARP_THREADS) - { - // Determine if we don't need bounds checking - if (valid_items >= LOGICAL_WARP_THREADS) - { - return InternalWarpReduce(temp_storage, warp_id, lane_id).Reduce(input, valid_items, reduction_op); - } - else - { - return InternalWarpReduce(temp_storage, warp_id, lane_id).Reduce(input, valid_items, reduction_op); - } - } - - - /** - * \brief Computes a segmented reduction in each active warp where segments are defined by head-flags. The reduction of each segment is returned to the first lane in that segment (which always includes lane0). - * - * Supports non-commutative reduction operators - * - * \smemreuse - * - * The code snippet below illustrates a head-segmented warp max - * reduction within a block of 32 threads (one warp). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpReduce for a single warp on type int - * typedef cub::WarpReduce WarpReduce; - * - * // Allocate shared memory for WarpReduce - * __shared__ typename WarpReduce::TempStorage temp_storage; - * - * // Obtain one input item and flag per thread - * int thread_data = ... - * int head_flag = ... - * - * // Return the warp-wide reductions to each lane0 - * int aggregate = WarpReduce(temp_storage).HeadSegmentedReduce( - * thread_data, head_flag, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data and \p head_flag across the block of threads - * is 0, 1, 2, 3, ..., 31 and is 1, 0, 0, 0, 1, 0, 0, 0, ..., 1, 0, 0, 0, - * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be - * \p 3, \p 7, \p 11, etc. (and is undefined in other threads). - * - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ - template < - typename ReductionOp, - typename Flag> - __device__ __forceinline__ T HeadSegmentedReduce( - T input, ///< [in] Calling thread's input - Flag head_flag, ///< [in] Head flag denoting whether or not \p input is the start of a new segment - ReductionOp reduction_op) ///< [in] Reduction operator - { - return InternalWarpReduce(temp_storage, warp_id, lane_id).template SegmentedReduce(input, head_flag, reduction_op); - } - - - /** - * \brief Computes a segmented reduction in each active warp where segments are defined by tail-flags. The reduction of each segment is returned to the first lane in that segment (which always includes lane0). - * - * Supports non-commutative reduction operators - * - * \smemreuse - * - * The code snippet below illustrates a tail-segmented warp max - * reduction within a block of 32 threads (one warp). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpReduce for a single warp on type int - * typedef cub::WarpReduce WarpReduce; - * - * // Allocate shared memory for WarpReduce - * __shared__ typename WarpReduce::TempStorage temp_storage; - * - * // Obtain one input item and flag per thread - * int thread_data = ... - * int tail_flag = ... - * - * // Return the warp-wide reductions to each lane0 - * int aggregate = WarpReduce(temp_storage).TailSegmentedReduce( - * thread_data, tail_flag, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data and \p tail_flag across the block of threads - * is 0, 1, 2, 3, ..., 31 and is 0, 0, 0, 1, 0, 0, 0, 1, ..., 0, 0, 0, 1, - * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be - * \p 3, \p 7, \p 11, etc. (and is undefined in other threads). - * - * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) - */ - template < - typename ReductionOp, - typename Flag> - __device__ __forceinline__ T TailSegmentedReduce( - T input, ///< [in] Calling thread's input - Flag tail_flag, ///< [in] Tail flag denoting whether or not \p input is the end of the current segment - ReductionOp reduction_op) ///< [in] Reduction operator - { - return InternalWarpReduce(temp_storage, warp_id, lane_id).template SegmentedReduce(input, tail_flag, reduction_op); - } - - - - //@} end member group -}; - -/** @} */ // end group WarpModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/cub/warp/warp_scan.cuh b/kokkos/kokkos/TPL/cub/warp/warp_scan.cuh deleted file mode 100644 index a588b52..0000000 --- a/kokkos/kokkos/TPL/cub/warp/warp_scan.cuh +++ /dev/null @@ -1,1297 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * The cub::WarpScan class provides [collective](index.html#sec0) methods for computing a parallel prefix scan of items partitioned across CUDA warp threads. - */ - -#pragma once - -#include "specializations/warp_scan_shfl.cuh" -#include "specializations/warp_scan_smem.cuh" -#include "../thread/thread_operators.cuh" -#include "../util_arch.cuh" -#include "../util_type.cuh" -#include "../util_namespace.cuh" - -/// Optional outer namespace(s) -CUB_NS_PREFIX - -/// CUB namespace -namespace cub { - -/** - * \addtogroup WarpModule - * @{ - */ - -/** - * \brief The WarpScan class provides [collective](index.html#sec0) methods for computing a parallel prefix scan of items partitioned across CUDA warp threads. ![](warp_scan_logo.png) - * - * \par Overview - * Given a list of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) - * produces an output list where each element is computed to be the reduction - * of the elements occurring earlier in the input list. Prefix sum - * connotes a prefix scan with the addition operator. The term \em inclusive indicates - * that the ith output reduction incorporates the ith input. - * The term \em exclusive indicates the ith input is not incorporated into - * the ith output reduction. - * - * \tparam T The scan input/output element type - * \tparam LOGICAL_WARPS [optional] The number of "logical" warps performing concurrent warp scans. Default is 1. - * \tparam LOGICAL_WARP_THREADS [optional] The number of threads per "logical" warp (may be less than the number of hardware warp threads). Default is the warp size associated with the CUDA Compute Capability targeted by the compiler (e.g., 32 threads for SM20). - * - * \par Simple Examples - * \warpcollective{WarpScan} - * \par - * The code snippet below illustrates four concurrent warp prefix sums within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute warp-wide prefix sums - * WarpScan(temp_storage).ExclusiveSum(thread_data, thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 1, 1, 1, 1, .... - * The corresponding output \p thread_data in each of the four warps of threads will be - * 0, 1, 2, 3, ..., 31. - * - * \par - * The code snippet below illustrates a single warp prefix sum within a block of - * 128 threads. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for one warp on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * ... - * - * // Only the first warp performs a prefix sum - * if (threadIdx.x < 32) - * { - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute warp-wide prefix sums - * WarpScan(temp_storage).ExclusiveSum(thread_data, thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the warp of threads is 1, 1, 1, 1, .... - * The corresponding output \p thread_data will be 0, 1, 2, 3, ..., 31. - * - * \par Usage and Performance Considerations - * - Supports "logical" warps smaller than the physical warp size (e.g., a logical warp of 8 threads) - * - The number of entrant threads must be an multiple of \p LOGICAL_WARP_THREADS - * - Warp scans are concurrent if more than one warp is participating - * - Uses special instructions when applicable (e.g., warp \p SHFL) - * - Uses synchronization-free communication between warp lanes when applicable - * - Zero bank conflicts for most types. - * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: - * - Summation (vs. generic scan) - * - The architecture's warp size is a whole multiple of \p LOGICAL_WARP_THREADS - * - */ -template < - typename T, - int LOGICAL_WARPS = 1, - int LOGICAL_WARP_THREADS = PtxArchProps::WARP_THREADS> -class WarpScan -{ -private: - - /****************************************************************************** - * Constants and typedefs - ******************************************************************************/ - - enum - { - POW_OF_TWO = ((LOGICAL_WARP_THREADS & (LOGICAL_WARP_THREADS - 1)) == 0), - }; - - /// Internal specialization. Use SHFL-based reduction if (architecture is >= SM30) and ((only one logical warp) or (LOGICAL_WARP_THREADS is a power-of-two)) - typedef typename If<(CUB_PTX_ARCH >= 300) && ((LOGICAL_WARPS == 1) || POW_OF_TWO), - WarpScanShfl, - WarpScanSmem >::Type InternalWarpScan; - - /// Shared memory storage layout type for WarpScan - typedef typename InternalWarpScan::TempStorage _TempStorage; - - - /****************************************************************************** - * Thread fields - ******************************************************************************/ - - /// Shared storage reference - _TempStorage &temp_storage; - - /// Warp ID - int warp_id; - - /// Lane ID - int lane_id; - - - /****************************************************************************** - * Utility methods - ******************************************************************************/ - - /// Internal storage allocator - __device__ __forceinline__ _TempStorage& PrivateStorage() - { - __shared__ TempStorage private_storage; - return private_storage; - } - - -public: - - /// \smemstorage{WarpScan} - struct TempStorage : Uninitialized<_TempStorage> {}; - - - /******************************************************************//** - * \name Collective constructors - *********************************************************************/ - //@{ - - /** - * \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Logical warp and lane identifiers are constructed from threadIdx.x. - */ - __device__ __forceinline__ WarpScan() - : - temp_storage(PrivateStorage()), - warp_id((LOGICAL_WARPS == 1) ? - 0 : - threadIdx.x / LOGICAL_WARP_THREADS), - lane_id(((LOGICAL_WARPS == 1) || (LOGICAL_WARP_THREADS == PtxArchProps::WARP_THREADS)) ? - LaneId() : - threadIdx.x % LOGICAL_WARP_THREADS) - {} - - - /** - * \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Logical warp and lane identifiers are constructed from threadIdx.x. - */ - __device__ __forceinline__ WarpScan( - TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage - : - temp_storage(temp_storage.Alias()), - warp_id((LOGICAL_WARPS == 1) ? - 0 : - threadIdx.x / LOGICAL_WARP_THREADS), - lane_id(((LOGICAL_WARPS == 1) || (LOGICAL_WARP_THREADS == PtxArchProps::WARP_THREADS)) ? - LaneId() : - threadIdx.x % LOGICAL_WARP_THREADS) - {} - - - /** - * \brief Collective constructor using a private static allocation of shared memory as temporary storage. Threads are identified using the given warp and lane identifiers. - */ - __device__ __forceinline__ WarpScan( - int warp_id, ///< [in] A suitable warp membership identifier - int lane_id) ///< [in] A lane identifier within the warp - : - temp_storage(PrivateStorage()), - warp_id(warp_id), - lane_id(lane_id) - {} - - - /** - * \brief Collective constructor using the specified memory allocation as temporary storage. Threads are identified using the given warp and lane identifiers. - */ - __device__ __forceinline__ WarpScan( - TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage - int warp_id, ///< [in] A suitable warp membership identifier - int lane_id) ///< [in] A lane identifier within the warp - : - temp_storage(temp_storage.Alias()), - warp_id(warp_id), - lane_id(lane_id) - {} - - - //@} end member group - /******************************************************************//** - * \name Inclusive prefix sums - *********************************************************************/ - //@{ - - - /** - * \brief Computes an inclusive prefix sum in each logical warp. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp-wide inclusive prefix sums within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute inclusive warp-wide prefix sums - * WarpScan(temp_storage).InclusiveSum(thread_data, thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 1, 1, 1, 1, .... - * The corresponding output \p thread_data in each of the four warps of threads will be - * 1, 2, 3, ..., 32. - */ - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output) ///< [out] Calling thread's output item. May be aliased with \p input. - { - InternalWarpScan(temp_storage, warp_id, lane_id).InclusiveSum(input, output); - } - - - /** - * \brief Computes an inclusive prefix sum in each logical warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - * - * The \p warp_aggregate is undefined in threads other than warp-lane0. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp-wide inclusive prefix sums within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute inclusive warp-wide prefix sums - * int warp_aggregate; - * WarpScan(temp_storage).InclusiveSum(thread_data, thread_data, warp_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 1, 1, 1, 1, .... - * The corresponding output \p thread_data in each of the four warps of threads will be - * 1, 2, 3, ..., 32. Furthermore, \p warp_aggregate for all threads in all warps will be \p 32. - */ - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - InternalWarpScan(temp_storage, warp_id, lane_id).InclusiveSum(input, output, warp_aggregate); - } - - - /** - * \brief Computes an inclusive prefix sum in each logical warp. Instead of using 0 as the warp-wide prefix, the call-back functor \p warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - * - * The \p warp_aggregate is undefined in threads other than warp-lane0. - * - * The \p warp_prefix_op functor must implement a member function T operator()(T warp_aggregate). - * The functor's input parameter \p warp_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the entire warp of threads, however only the return value from - * lane0 is applied as the threadblock-wide prefix. Can be stateful. - * - * \smemreuse - * - * The code snippet below illustrates a single thread block of 32 threads (one warp) that progressively - * computes an inclusive prefix sum over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 32 integer items that are partitioned across the warp. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct WarpPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ WarpPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the entire warp. Lane-0 is responsible - * // for returning a value for seeding the warp-wide scan. - * __device__ int operator()(int warp_aggregate) - * { - * int old_prefix = running_total; - * running_total += warp_aggregate; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize WarpScan for one warp - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Initialize running total - * WarpPrefixOp prefix_op(0); - * - * // Have the warp iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 32) - * { - * // Load a segment of consecutive items - * int thread_data = d_data[block_offset]; - * - * // Collectively compute the warp-wide inclusive prefix sum - * int warp_aggregate; - * WarpScan(temp_storage).InclusiveSum( - * thread_data, thread_data, warp_aggregate, prefix_op); - * - * // Store scanned items to output segment - * d_data[block_offset] = thread_data; - * } - * \endcode - * \par - * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... - * The corresponding output for the first segment will be 1, 2, 3, ..., 32. - * The output for the second segment will be 33, 34, 35, ..., 64. Furthermore, - * the value \p 32 will be stored in \p warp_aggregate for all threads after each scan. - * - * \tparam WarpPrefixOp [inferred] Call-back functor type having member T operator()(T warp_aggregate) - */ - template - __device__ __forceinline__ void InclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T &warp_aggregate, ///< [out] [warp-lane0 only] Warp-wide aggregate reduction of input items, exclusive of the \p warp_prefix_op value - WarpPrefixOp &warp_prefix_op) ///< [in-out] [warp-lane0 only] Call-back functor for specifying a warp-wide prefix to be applied to all inputs. - { - // Compute inclusive warp scan - InclusiveSum(input, output, warp_aggregate); - - // Compute warp-wide prefix from aggregate, then broadcast to other lanes - T prefix; - prefix = warp_prefix_op(warp_aggregate); - prefix = InternalWarpScan(temp_storage, warp_id, lane_id).Broadcast(prefix, 0); - - // Update output - output = prefix + output; - } - - //@} end member group - -private: - - /// Computes an exclusive prefix sum in each logical warp. - __device__ __forceinline__ void ExclusiveSum(T input, T &output, Int2Type is_primitive) - { - // Compute exclusive warp scan from inclusive warp scan - T inclusive; - InclusiveSum(input, inclusive); - output = inclusive - input; - } - - /// Computes an exclusive prefix sum in each logical warp. Specialized for non-primitive types. - __device__ __forceinline__ void ExclusiveSum(T input, T &output, Int2Type is_primitive) - { - // Delegate to regular scan for non-primitive types (because we won't be able to use subtraction) - T identity = T(); - ExclusiveScan(input, output, identity, Sum()); - } - - /// Computes an exclusive prefix sum in each logical warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - __device__ __forceinline__ void ExclusiveSum(T input, T &output, T &warp_aggregate, Int2Type is_primitive) - { - // Compute exclusive warp scan from inclusive warp scan - T inclusive; - InclusiveSum(input, inclusive, warp_aggregate); - output = inclusive - input; - } - - /// Computes an exclusive prefix sum in each logical warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. Specialized for non-primitive types. - __device__ __forceinline__ void ExclusiveSum(T input, T &output, T &warp_aggregate, Int2Type is_primitive) - { - // Delegate to regular scan for non-primitive types (because we won't be able to use subtraction) - T identity = T(); - ExclusiveScan(input, output, identity, Sum(), warp_aggregate); - } - - /// Computes an exclusive prefix sum in each logical warp. Instead of using 0 as the warp-wide prefix, the call-back functor \p warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - template - __device__ __forceinline__ void ExclusiveSum(T input, T &output, T &warp_aggregate, WarpPrefixOp &warp_prefix_op, Int2Type is_primitive) - { - // Compute exclusive warp scan from inclusive warp scan - T inclusive; - InclusiveSum(input, inclusive, warp_aggregate, warp_prefix_op); - output = inclusive - input; - } - - /// Computes an exclusive prefix sum in each logical warp. Instead of using 0 as the warp-wide prefix, the call-back functor \p warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. Specialized for non-primitive types. - template - __device__ __forceinline__ void ExclusiveSum(T input, T &output, T &warp_aggregate, WarpPrefixOp &warp_prefix_op, Int2Type is_primitive) - { - // Delegate to regular scan for non-primitive types (because we won't be able to use subtraction) - T identity = T(); - ExclusiveScan(input, output, identity, Sum(), warp_aggregate, warp_prefix_op); - } - -public: - - - /******************************************************************//** - * \name Exclusive prefix sums - *********************************************************************/ - //@{ - - - /** - * \brief Computes an exclusive prefix sum in each logical warp. - * - * This operation assumes the value of obtained by the T's default - * constructor (or by zero-initialization if no user-defined default - * constructor exists) is suitable as the identity value "zero" for - * addition. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp-wide exclusive prefix sums within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute exclusive warp-wide prefix sums - * WarpScan(temp_storage).ExclusiveSum(thread_data, thread_data); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 1, 1, 1, 1, .... - * The corresponding output \p thread_data in each of the four warps of threads will be - * 0, 1, 2, ..., 31. - * - */ - __device__ __forceinline__ void ExclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output) ///< [out] Calling thread's output item. May be aliased with \p input. - { - ExclusiveSum(input, output, Int2Type::PRIMITIVE>()); - } - - - /** - * \brief Computes an exclusive prefix sum in each logical warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - * - * This operation assumes the value of obtained by the T's default - * constructor (or by zero-initialization if no user-defined default - * constructor exists) is suitable as the identity value "zero" for - * addition. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp-wide exclusive prefix sums within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute exclusive warp-wide prefix sums - * int warp_aggregate; - * WarpScan(temp_storage).ExclusiveSum(thread_data, thread_data, warp_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 1, 1, 1, 1, .... - * The corresponding output \p thread_data in each of the four warps of threads will be - * 0, 1, 2, ..., 31. Furthermore, \p warp_aggregate for all threads in all warps will be \p 32. - */ - __device__ __forceinline__ void ExclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - ExclusiveSum(input, output, warp_aggregate, Int2Type::PRIMITIVE>()); - } - - - /** - * \brief Computes an exclusive prefix sum in each logical warp. Instead of using 0 as the warp-wide prefix, the call-back functor \p warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - * - * This operation assumes the value of obtained by the T's default - * constructor (or by zero-initialization if no user-defined default - * constructor exists) is suitable as the identity value "zero" for - * addition. - * - * The \p warp_prefix_op functor must implement a member function T operator()(T warp_aggregate). - * The functor's input parameter \p warp_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the entire warp of threads, however only the return value from - * lane0 is applied as the threadblock-wide prefix. Can be stateful. - * - * \smemreuse - * - * The code snippet below illustrates a single thread block of 32 threads (one warp) that progressively - * computes an exclusive prefix sum over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 32 integer items that are partitioned across the warp. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct WarpPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ WarpPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the entire warp. Lane-0 is responsible - * // for returning a value for seeding the warp-wide scan. - * __device__ int operator()(int warp_aggregate) - * { - * int old_prefix = running_total; - * running_total += warp_aggregate; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize WarpScan for one warp - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Initialize running total - * WarpPrefixOp prefix_op(0); - * - * // Have the warp iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 32) - * { - * // Load a segment of consecutive items - * int thread_data = d_data[block_offset]; - * - * // Collectively compute the warp-wide exclusive prefix sum - * int warp_aggregate; - * WarpScan(temp_storage).ExclusiveSum( - * thread_data, thread_data, warp_aggregate, prefix_op); - * - * // Store scanned items to output segment - * d_data[block_offset] = thread_data; - * } - * \endcode - * \par - * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... - * The corresponding output for the first segment will be 0, 1, 2, ..., 31. - * The output for the second segment will be 32, 33, 34, ..., 63. Furthermore, - * the value \p 32 will be stored in \p warp_aggregate for all threads after each scan. - * - * \tparam WarpPrefixOp [inferred] Call-back functor type having member T operator()(T warp_aggregate) - */ - template - __device__ __forceinline__ void ExclusiveSum( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T &warp_aggregate, ///< [out] [warp-lane0 only] Warp-wide aggregate reduction of input items (exclusive of the \p warp_prefix_op value). - WarpPrefixOp &warp_prefix_op) ///< [in-out] [warp-lane0 only] Call-back functor for specifying a warp-wide prefix to be applied to all inputs. - { - ExclusiveSum(input, output, warp_aggregate, warp_prefix_op, Int2Type::PRIMITIVE>()); - } - - - //@} end member group - /******************************************************************//** - * \name Inclusive prefix scans - *********************************************************************/ - //@{ - - /** - * \brief Computes an inclusive prefix sum using the specified binary scan functor in each logical warp. - * - * Supports non-commutative scan operators. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp-wide inclusive prefix max scans within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute inclusive warp-wide prefix max scans - * WarpScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. - * The corresponding output \p thread_data in the first warp would be - * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op) ///< [in] Binary scan operator - { - InternalWarpScan(temp_storage, warp_id, lane_id).InclusiveScan(input, output, scan_op); - } - - - /** - * \brief Computes an inclusive prefix sum using the specified binary scan functor in each logical warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - * - * Supports non-commutative scan operators. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp-wide inclusive prefix max scans within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute inclusive warp-wide prefix max scans - * int warp_aggregate; - * WarpScan(temp_storage).InclusiveScan( - * thread_data, thread_data, cub::Max(), warp_aggregate); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. - * The corresponding output \p thread_data in the first warp would be - * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. - * Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads - * in the second warp, etc. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - InternalWarpScan(temp_storage, warp_id, lane_id).InclusiveScan(input, output, scan_op, warp_aggregate); - } - - - /** - * \brief Computes an inclusive prefix sum using the specified binary scan functor in each logical warp. The call-back functor \p warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - * - * The \p warp_prefix_op functor must implement a member function T operator()(T warp_aggregate). - * The functor's input parameter \p warp_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the entire warp of threads, however only the return value from - * lane0 is applied as the threadblock-wide prefix. Can be stateful. - * - * Supports non-commutative scan operators. - * - * \smemreuse - * - * The code snippet below illustrates a single thread block of 32 threads (one warp) that progressively - * computes an inclusive prefix max scan over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 32 integer items that are partitioned across the warp. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct WarpPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ WarpPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the entire warp. Lane-0 is responsible - * // for returning a value for seeding the warp-wide scan. - * __device__ int operator()(int warp_aggregate) - * { - * int old_prefix = running_total; - * running_total = (warp_aggregate > old_prefix) ? warp_aggregate : old_prefix; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize WarpScan for one warp - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Initialize running total - * WarpPrefixOp prefix_op(0); - * - * // Have the warp iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 32) - * { - * // Load a segment of consecutive items - * int thread_data = d_data[block_offset]; - * - * // Collectively compute the warp-wide inclusive prefix max scan - * int warp_aggregate; - * WarpScan(temp_storage).InclusiveScan( - * thread_data, thread_data, cub::Max(), warp_aggregate, prefix_op); - * - * // Store scanned items to output segment - * d_data[block_offset] = thread_data; - * } - * \endcode - * \par - * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... - * The corresponding output for the first segment will be 0, 0, 2, 2, ..., 30, 30. - * The output for the second segment will be 32, 32, 34, 34, ..., 62, 62. Furthermore, - * \p block_aggregate will be assigned \p 30 in all threads after the first scan, assigned \p 62 after the second - * scan, etc. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - * \tparam WarpPrefixOp [inferred] Call-back functor type having member T operator()(T warp_aggregate) - */ - template < - typename ScanOp, - typename WarpPrefixOp> - __device__ __forceinline__ void InclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate, ///< [out] [warp-lane0 only] Warp-wide aggregate reduction of input items (exclusive of the \p warp_prefix_op value). - WarpPrefixOp &warp_prefix_op) ///< [in-out] [warp-lane0 only] Call-back functor for specifying a warp-wide prefix to be applied to all inputs. - { - // Compute inclusive warp scan - InclusiveScan(input, output, scan_op, warp_aggregate); - - // Compute warp-wide prefix from aggregate, then broadcast to other lanes - T prefix; - prefix = warp_prefix_op(warp_aggregate); - prefix = InternalWarpScan(temp_storage, warp_id, lane_id).Broadcast(prefix, 0); - - // Update output - output = scan_op(prefix, output); - } - - - //@} end member group - /******************************************************************//** - * \name Exclusive prefix scans - *********************************************************************/ - //@{ - - /** - * \brief Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. - * - * Supports non-commutative scan operators. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute exclusive warp-wide prefix max scans - * WarpScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. - * The corresponding output \p thread_data in the first warp would be - * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T identity, ///< [in] Identity value - ScanOp scan_op) ///< [in] Binary scan operator - { - InternalWarpScan(temp_storage, warp_id, lane_id).ExclusiveScan(input, output, identity, scan_op); - } - - - /** - * \brief Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - * - * Supports non-commutative scan operators. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute exclusive warp-wide prefix max scans - * WarpScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. - * The corresponding output \p thread_data in the first warp would be - * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. - * Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads - * in the second warp, etc. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - InternalWarpScan(temp_storage, warp_id, lane_id).ExclusiveScan(input, output, identity, scan_op, warp_aggregate); - } - - - /** - * \brief Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. The call-back functor \p warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - * - * The \p warp_prefix_op functor must implement a member function T operator()(T warp_aggregate). - * The functor's input parameter \p warp_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the entire warp of threads, however only the return value from - * lane0 is applied as the threadblock-wide prefix. Can be stateful. - * - * Supports non-commutative scan operators. - * - * \smemreuse - * - * The code snippet below illustrates a single thread block of 32 threads (one warp) that progressively - * computes an exclusive prefix max scan over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 32 integer items that are partitioned across the warp. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct WarpPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ WarpPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the entire warp. Lane-0 is responsible - * // for returning a value for seeding the warp-wide scan. - * __device__ int operator()(int warp_aggregate) - * { - * int old_prefix = running_total; - * running_total = (warp_aggregate > old_prefix) ? warp_aggregate : old_prefix; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize WarpScan for one warp - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Initialize running total - * WarpPrefixOp prefix_op(INT_MIN); - * - * // Have the warp iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 32) - * { - * // Load a segment of consecutive items - * int thread_data = d_data[block_offset]; - * - * // Collectively compute the warp-wide exclusive prefix max scan - * int warp_aggregate; - * WarpScan(temp_storage).ExclusiveScan( - * thread_data, thread_data, INT_MIN, cub::Max(), warp_aggregate, prefix_op); - * - * // Store scanned items to output segment - * d_data[block_offset] = thread_data; - * } - * \endcode - * \par - * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... - * The corresponding output for the first segment will be INT_MIN, 0, 0, 2, ..., 28, 30. - * The output for the second segment will be 30, 32, 32, 34, ..., 60, 62. Furthermore, - * \p block_aggregate will be assigned \p 30 in all threads after the first scan, assigned \p 62 after the second - * scan, etc. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - * \tparam WarpPrefixOp [inferred] Call-back functor type having member T operator()(T warp_aggregate) - */ - template < - typename ScanOp, - typename WarpPrefixOp> - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - T identity, ///< [in] Identity value - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate, ///< [out] [warp-lane0 only] Warp-wide aggregate reduction of input items (exclusive of the \p warp_prefix_op value). - WarpPrefixOp &warp_prefix_op) ///< [in-out] [warp-lane0 only] Call-back functor for specifying a warp-wide prefix to be applied to all inputs. - { - // Exclusive warp scan - ExclusiveScan(input, output, identity, scan_op, warp_aggregate); - - // Compute warp-wide prefix from aggregate, then broadcast to other lanes - T prefix = warp_prefix_op(warp_aggregate); - prefix = InternalWarpScan(temp_storage, warp_id, lane_id).Broadcast(prefix, 0); - - // Update output - output = (lane_id == 0) ? - prefix : - scan_op(prefix, output); - } - - - //@} end member group - /******************************************************************//** - * \name Identityless exclusive prefix scans - *********************************************************************/ - //@{ - - - /** - * \brief Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. Because no identity value is supplied, the \p output computed for warp-lane0 is undefined. - * - * Supports non-commutative scan operators. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute exclusive warp-wide prefix max scans - * WarpScan(temp_storage).ExclusiveScan(thread_data, thread_data, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. - * The corresponding output \p thread_data in the first warp would be - * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. - * (The output \p thread_data in each warp lane0 is undefined.) - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op) ///< [in] Binary scan operator - { - InternalWarpScan(temp_storage, warp_id, lane_id).ExclusiveScan(input, output, scan_op); - } - - - /** - * \brief Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. Because no identity value is supplied, the \p output computed for warp-lane0 is undefined. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. - * - * Supports non-commutative scan operators. - * - * \smemreuse - * - * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of - * 128 threads (one per each of the 32-thread warps). - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * // Specialize WarpScan for 4 warps on type int - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Obtain one input item per thread - * int thread_data = ... - * - * // Compute exclusive warp-wide prefix max scans - * WarpScan(temp_storage).ExclusiveScan(thread_data, thread_data, cub::Max()); - * - * \endcode - * \par - * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. - * The corresponding output \p thread_data in the first warp would be - * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. - * (The output \p thread_data in each warp lane0 is undefined.) Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads - * in the second warp, etc. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - */ - template - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. - { - InternalWarpScan(temp_storage, warp_id, lane_id).ExclusiveScan(input, output, scan_op, warp_aggregate); - } - - - /** - * \brief Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. The \p warp_prefix_op value from thread-thread-lane0 is applied to all scan outputs. Also computes the warp-wide \p warp_aggregate of all inputs for thread-thread-lane0. - * - * The \p warp_prefix_op functor must implement a member function T operator()(T warp_aggregate). - * The functor's input parameter \p warp_aggregate is the same value also returned by the scan operation. - * The functor will be invoked by the entire warp of threads, however only the return value from - * lane0 is applied as the threadblock-wide prefix. Can be stateful. - * - * Supports non-commutative scan operators. - * - * \smemreuse - * - * The code snippet below illustrates a single thread block of 32 threads (one warp) that progressively - * computes an exclusive prefix max scan over multiple "tiles" of input using a - * prefix functor to maintain a running total between block-wide scans. Each tile consists - * of 32 integer items that are partitioned across the warp. - * \par - * \code - * #include - * - * // A stateful callback functor that maintains a running prefix to be applied - * // during consecutive scan operations. - * struct WarpPrefixOp - * { - * // Running prefix - * int running_total; - * - * // Constructor - * __device__ WarpPrefixOp(int running_total) : running_total(running_total) {} - * - * // Callback operator to be entered by the entire warp. Lane-0 is responsible - * // for returning a value for seeding the warp-wide scan. - * __device__ int operator()(int warp_aggregate) - * { - * int old_prefix = running_total; - * running_total = (warp_aggregate > old_prefix) ? warp_aggregate : old_prefix; - * return old_prefix; - * } - * }; - * - * __global__ void ExampleKernel(int *d_data, int num_items, ...) - * { - * // Specialize WarpScan for one warp - * typedef cub::WarpScan WarpScan; - * - * // Allocate shared memory for WarpScan - * __shared__ typename WarpScan::TempStorage temp_storage; - * - * // Initialize running total - * WarpPrefixOp prefix_op(INT_MIN); - * - * // Have the warp iterate over segments of items - * for (int block_offset = 0; block_offset < num_items; block_offset += 32) - * { - * // Load a segment of consecutive items - * int thread_data = d_data[block_offset]; - * - * // Collectively compute the warp-wide exclusive prefix max scan - * int warp_aggregate; - * WarpScan(temp_storage).ExclusiveScan( - * thread_data, thread_data, INT_MIN, cub::Max(), warp_aggregate, prefix_op); - * - * // Store scanned items to output segment - * d_data[block_offset] = thread_data; - * } - * \endcode - * \par - * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... - * The corresponding output for the first segment will be INT_MIN, 0, 0, 2, ..., 28, 30. - * The output for the second segment will be 30, 32, 32, 34, ..., 60, 62. Furthermore, - * \p block_aggregate will be assigned \p 30 in all threads after the first scan, assigned \p 62 after the second - * scan, etc. - * - * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) - * \tparam WarpPrefixOp [inferred] Call-back functor type having member T operator()(T warp_aggregate) - */ - template < - typename ScanOp, - typename WarpPrefixOp> - __device__ __forceinline__ void ExclusiveScan( - T input, ///< [in] Calling thread's input item. - T &output, ///< [out] Calling thread's output item. May be aliased with \p input. - ScanOp scan_op, ///< [in] Binary scan operator - T &warp_aggregate, ///< [out] [warp-lane0 only] Warp-wide aggregate reduction of input items (exclusive of the \p warp_prefix_op value). - WarpPrefixOp &warp_prefix_op) ///< [in-out] [warp-lane0 only] Call-back functor for specifying a warp-wide prefix to be applied to all inputs. - { - // Exclusive warp scan - ExclusiveScan(input, output, scan_op, warp_aggregate); - - // Compute warp-wide prefix from aggregate, then broadcast to other lanes - T prefix = warp_prefix_op(warp_aggregate); - prefix = InternalWarpScan(temp_storage, warp_id, lane_id).Broadcast(prefix, 0); - - // Update output with prefix - output = (lane_id == 0) ? - prefix : - scan_op(prefix, output); - } - - //@} end member group -}; - -/** @} */ // end group WarpModule - -} // CUB namespace -CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/kokkos/kokkos/TPL/gtest.readme b/kokkos/kokkos/TPL/gtest.readme deleted file mode 100644 index 91cc4c3..0000000 --- a/kokkos/kokkos/TPL/gtest.readme +++ /dev/null @@ -1,18 +0,0 @@ -These files were created using the python script included with the gtest distribution. - -# Fusing Google Test Source Files -# -# Google Test's implementation consists of ~30 files (excluding its own tests). -# Sometimes you may want them to be packaged up in two files (a .h and a .cc) instead, -# such that you can easily copy them to a new machine and start hacking there. For -# this we provide an experimental Python script fuse_gtest_files.py in the scripts/ -# directory (since release 1.3.0). Assuming you have Python 2.4 or above installed on -# your machine, just go to that directory and run -# -# python fuse_gtest_files.py OUTPUT_DIR -# -# and you should see an OUTPUT_DIR directory being created with files gtest/gtest.h -# and gtest/gtest-all.cc in it. These files contain everything you need to use Google -# Test. Just copy them to anywhere you want and you are ready to write tests. You can -# use the scripts/test/Makefile file as an example on how to compile your tests against -# them. diff --git a/kokkos/kokkos/TPL/gtest/GTEST_COPYING b/kokkos/kokkos/TPL/gtest/GTEST_COPYING deleted file mode 100644 index 1941a11..0000000 --- a/kokkos/kokkos/TPL/gtest/GTEST_COPYING +++ /dev/null @@ -1,28 +0,0 @@ -Copyright 2008, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/kokkos/kokkos/TPL/gtest/gtest-all.cc b/kokkos/kokkos/TPL/gtest/gtest-all.cc deleted file mode 100644 index e0a578d..0000000 --- a/kokkos/kokkos/TPL/gtest/gtest-all.cc +++ /dev/null @@ -1,9118 +0,0 @@ -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: mheule@google.com (Markus Heule) -// -// Google C++ Testing Framework (Google Test) -// -// Sometimes it's desirable to build Google Test by compiling a single file. -// This file serves this purpose. - -// This line ensures that gtest.h can be compiled on its own, even -// when it's fused. -#include - -// The following lines pull in the real gtest *.cc files. -// Copyright 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) -// -// The Google C++ Testing Framework (Google Test) - -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) -// -// Utilities for testing Google Test itself and code that uses Google Test -// (e.g. frameworks built on top of Google Test). - -#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_ -#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_ - - -namespace testing { - -// This helper class can be used to mock out Google Test failure reporting -// so that we can test Google Test or code that builds on Google Test. -// -// An object of this class appends a TestPartResult object to the -// TestPartResultArray object given in the constructor whenever a Google Test -// failure is reported. It can either intercept only failures that are -// generated in the same thread that created this object or it can intercept -// all generated failures. The scope of this mock object can be controlled with -// the second argument to the two arguments constructor. -class GTEST_API_ ScopedFakeTestPartResultReporter - : public TestPartResultReporterInterface { - public: - // The two possible mocking modes of this object. - enum InterceptMode { - INTERCEPT_ONLY_CURRENT_THREAD, // Intercepts only thread local failures. - INTERCEPT_ALL_THREADS // Intercepts all failures. - }; - - // The c'tor sets this object as the test part result reporter used - // by Google Test. The 'result' parameter specifies where to report the - // results. This reporter will only catch failures generated in the current - // thread. DEPRECATED - explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result); - - // Same as above, but you can choose the interception scope of this object. - ScopedFakeTestPartResultReporter(InterceptMode intercept_mode, - TestPartResultArray* result); - - // The d'tor restores the previous test part result reporter. - virtual ~ScopedFakeTestPartResultReporter(); - - // Appends the TestPartResult object to the TestPartResultArray - // received in the constructor. - // - // This method is from the TestPartResultReporterInterface - // interface. - virtual void ReportTestPartResult(const TestPartResult& result); - private: - void Init(); - - const InterceptMode intercept_mode_; - TestPartResultReporterInterface* old_reporter_; - TestPartResultArray* const result_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter); -}; - -namespace internal { - -// A helper class for implementing EXPECT_FATAL_FAILURE() and -// EXPECT_NONFATAL_FAILURE(). Its destructor verifies that the given -// TestPartResultArray contains exactly one failure that has the given -// type and contains the given substring. If that's not the case, a -// non-fatal failure will be generated. -class GTEST_API_ SingleFailureChecker { - public: - // The constructor remembers the arguments. - SingleFailureChecker(const TestPartResultArray* results, - TestPartResult::Type type, - const string& substr); - ~SingleFailureChecker(); - private: - const TestPartResultArray* const results_; - const TestPartResult::Type type_; - const string substr_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker); -}; - -} // namespace internal - -} // namespace testing - -// A set of macros for testing Google Test assertions or code that's expected -// to generate Google Test fatal failures. It verifies that the given -// statement will cause exactly one fatal Google Test failure with 'substr' -// being part of the failure message. -// -// There are two different versions of this macro. EXPECT_FATAL_FAILURE only -// affects and considers failures generated in the current thread and -// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads. -// -// The verification of the assertion is done correctly even when the statement -// throws an exception or aborts the current function. -// -// Known restrictions: -// - 'statement' cannot reference local non-static variables or -// non-static members of the current object. -// - 'statement' cannot return a value. -// - You cannot stream a failure message to this macro. -// -// Note that even though the implementations of the following two -// macros are much alike, we cannot refactor them to use a common -// helper macro, due to some peculiarity in how the preprocessor -// works. The AcceptsMacroThatExpandsToUnprotectedComma test in -// gtest_unittest.cc will fail to compile if we do that. -#define EXPECT_FATAL_FAILURE(statement, substr) \ - do { \ - class GTestExpectFatalFailureHelper {\ - public:\ - static void Execute() { statement; }\ - };\ - ::testing::TestPartResultArray gtest_failures;\ - ::testing::internal::SingleFailureChecker gtest_checker(\ - >est_failures, ::testing::TestPartResult::kFatalFailure, (substr));\ - {\ - ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ - ::testing::ScopedFakeTestPartResultReporter:: \ - INTERCEPT_ONLY_CURRENT_THREAD, >est_failures);\ - GTestExpectFatalFailureHelper::Execute();\ - }\ - } while (::testing::internal::AlwaysFalse()) - -#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \ - do { \ - class GTestExpectFatalFailureHelper {\ - public:\ - static void Execute() { statement; }\ - };\ - ::testing::TestPartResultArray gtest_failures;\ - ::testing::internal::SingleFailureChecker gtest_checker(\ - >est_failures, ::testing::TestPartResult::kFatalFailure, (substr));\ - {\ - ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ - ::testing::ScopedFakeTestPartResultReporter:: \ - INTERCEPT_ALL_THREADS, >est_failures);\ - GTestExpectFatalFailureHelper::Execute();\ - }\ - } while (::testing::internal::AlwaysFalse()) - -// A macro for testing Google Test assertions or code that's expected to -// generate Google Test non-fatal failures. It asserts that the given -// statement will cause exactly one non-fatal Google Test failure with 'substr' -// being part of the failure message. -// -// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only -// affects and considers failures generated in the current thread and -// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads. -// -// 'statement' is allowed to reference local variables and members of -// the current object. -// -// The verification of the assertion is done correctly even when the statement -// throws an exception or aborts the current function. -// -// Known restrictions: -// - You cannot stream a failure message to this macro. -// -// Note that even though the implementations of the following two -// macros are much alike, we cannot refactor them to use a common -// helper macro, due to some peculiarity in how the preprocessor -// works. If we do that, the code won't compile when the user gives -// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that -// expands to code containing an unprotected comma. The -// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc -// catches that. -// -// For the same reason, we have to write -// if (::testing::internal::AlwaysTrue()) { statement; } -// instead of -// GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) -// to avoid an MSVC warning on unreachable code. -#define EXPECT_NONFATAL_FAILURE(statement, substr) \ - do {\ - ::testing::TestPartResultArray gtest_failures;\ - ::testing::internal::SingleFailureChecker gtest_checker(\ - >est_failures, ::testing::TestPartResult::kNonFatalFailure, \ - (substr));\ - {\ - ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ - ::testing::ScopedFakeTestPartResultReporter:: \ - INTERCEPT_ONLY_CURRENT_THREAD, >est_failures);\ - if (::testing::internal::AlwaysTrue()) { statement; }\ - }\ - } while (::testing::internal::AlwaysFalse()) - -#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \ - do {\ - ::testing::TestPartResultArray gtest_failures;\ - ::testing::internal::SingleFailureChecker gtest_checker(\ - >est_failures, ::testing::TestPartResult::kNonFatalFailure, \ - (substr));\ - {\ - ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ - ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS,\ - >est_failures);\ - if (::testing::internal::AlwaysTrue()) { statement; }\ - }\ - } while (::testing::internal::AlwaysFalse()) - -#endif // GTEST_INCLUDE_GTEST_GTEST_SPI_H_ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include // NOLINT -#include -#include - -#if GTEST_OS_LINUX - -// TODO(kenton@google.com): Use autoconf to detect availability of -// gettimeofday(). -# define GTEST_HAS_GETTIMEOFDAY_ 1 - -# include // NOLINT -# include // NOLINT -# include // NOLINT -// Declares vsnprintf(). This header is not available on Windows. -# include // NOLINT -# include // NOLINT -# include // NOLINT -# include // NOLINT -# include - -#elif GTEST_OS_SYMBIAN -# define GTEST_HAS_GETTIMEOFDAY_ 1 -# include // NOLINT - -#elif GTEST_OS_ZOS -# define GTEST_HAS_GETTIMEOFDAY_ 1 -# include // NOLINT - -// On z/OS we additionally need strings.h for strcasecmp. -# include // NOLINT - -#elif GTEST_OS_WINDOWS_MOBILE // We are on Windows CE. - -# include // NOLINT - -#elif GTEST_OS_WINDOWS // We are on Windows proper. - -# include // NOLINT -# include // NOLINT -# include // NOLINT -# include // NOLINT - -# if GTEST_OS_WINDOWS_MINGW -// MinGW has gettimeofday() but not _ftime64(). -// TODO(kenton@google.com): Use autoconf to detect availability of -// gettimeofday(). -// TODO(kenton@google.com): There are other ways to get the time on -// Windows, like GetTickCount() or GetSystemTimeAsFileTime(). MinGW -// supports these. consider using them instead. -# define GTEST_HAS_GETTIMEOFDAY_ 1 -# include // NOLINT -# endif // GTEST_OS_WINDOWS_MINGW - -// cpplint thinks that the header is already included, so we want to -// silence it. -# include // NOLINT - -#else - -// Assume other platforms have gettimeofday(). -// TODO(kenton@google.com): Use autoconf to detect availability of -// gettimeofday(). -# define GTEST_HAS_GETTIMEOFDAY_ 1 - -// cpplint thinks that the header is already included, so we want to -// silence it. -# include // NOLINT -# include // NOLINT - -#endif // GTEST_OS_LINUX - -#if GTEST_HAS_EXCEPTIONS -# include -#endif - -#if GTEST_CAN_STREAM_RESULTS_ -# include // NOLINT -# include // NOLINT -#endif - -// Indicates that this translation unit is part of Google Test's -// implementation. It must come before gtest-internal-inl.h is -// included, or there will be a compiler error. This trick is to -// prevent a user from accidentally including gtest-internal-inl.h in -// his code. -#define GTEST_IMPLEMENTATION_ 1 -// Copyright 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Utility functions and classes used by the Google C++ testing framework. -// -// Author: wan@google.com (Zhanyong Wan) -// -// This file contains purely Google Test's internal implementation. Please -// DO NOT #INCLUDE IT IN A USER PROGRAM. - -#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_ -#define GTEST_SRC_GTEST_INTERNAL_INL_H_ - -// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is -// part of Google Test's implementation; otherwise it's undefined. -#if !GTEST_IMPLEMENTATION_ -// A user is trying to include this from his code - just say no. -# error "gtest-internal-inl.h is part of Google Test's internal implementation." -# error "It must not be included except by Google Test itself." -#endif // GTEST_IMPLEMENTATION_ - -#ifndef _WIN32_WCE -# include -#endif // !_WIN32_WCE -#include -#include // For strtoll/_strtoul64/malloc/free. -#include // For memmove. - -#include -#include -#include - - -#if GTEST_OS_WINDOWS -# include // NOLINT -#endif // GTEST_OS_WINDOWS - - -namespace testing { - -// Declares the flags. -// -// We don't want the users to modify this flag in the code, but want -// Google Test's own unit tests to be able to access it. Therefore we -// declare it here as opposed to in gtest.h. -GTEST_DECLARE_bool_(death_test_use_fork); - -namespace internal { - -// The value of GetTestTypeId() as seen from within the Google Test -// library. This is solely for testing GetTestTypeId(). -GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest; - -// Names of the flags (needed for parsing Google Test flags). -const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests"; -const char kBreakOnFailureFlag[] = "break_on_failure"; -const char kCatchExceptionsFlag[] = "catch_exceptions"; -const char kColorFlag[] = "color"; -const char kFilterFlag[] = "filter"; -const char kListTestsFlag[] = "list_tests"; -const char kOutputFlag[] = "output"; -const char kPrintTimeFlag[] = "print_time"; -const char kRandomSeedFlag[] = "random_seed"; -const char kRepeatFlag[] = "repeat"; -const char kShuffleFlag[] = "shuffle"; -const char kStackTraceDepthFlag[] = "stack_trace_depth"; -const char kStreamResultToFlag[] = "stream_result_to"; -const char kThrowOnFailureFlag[] = "throw_on_failure"; - -// A valid random seed must be in [1, kMaxRandomSeed]. -const int kMaxRandomSeed = 99999; - -// g_help_flag is true iff the --help flag or an equivalent form is -// specified on the command line. -GTEST_API_ extern bool g_help_flag; - -// Returns the current time in milliseconds. -GTEST_API_ TimeInMillis GetTimeInMillis(); - -// Returns true iff Google Test should use colors in the output. -GTEST_API_ bool ShouldUseColor(bool stdout_is_tty); - -// Formats the given time in milliseconds as seconds. -GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms); - -// Parses a string for an Int32 flag, in the form of "--flag=value". -// -// On success, stores the value of the flag in *value, and returns -// true. On failure, returns false without changing *value. -GTEST_API_ bool ParseInt32Flag( - const char* str, const char* flag, Int32* value); - -// Returns a random seed in range [1, kMaxRandomSeed] based on the -// given --gtest_random_seed flag value. -inline int GetRandomSeedFromFlag(Int32 random_seed_flag) { - const unsigned int raw_seed = (random_seed_flag == 0) ? - static_cast(GetTimeInMillis()) : - static_cast(random_seed_flag); - - // Normalizes the actual seed to range [1, kMaxRandomSeed] such that - // it's easy to type. - const int normalized_seed = - static_cast((raw_seed - 1U) % - static_cast(kMaxRandomSeed)) + 1; - return normalized_seed; -} - -// Returns the first valid random seed after 'seed'. The behavior is -// undefined if 'seed' is invalid. The seed after kMaxRandomSeed is -// considered to be 1. -inline int GetNextRandomSeed(int seed) { - GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed) - << "Invalid random seed " << seed << " - must be in [1, " - << kMaxRandomSeed << "]."; - const int next_seed = seed + 1; - return (next_seed > kMaxRandomSeed) ? 1 : next_seed; -} - -// This class saves the values of all Google Test flags in its c'tor, and -// restores them in its d'tor. -class GTestFlagSaver { - public: - // The c'tor. - GTestFlagSaver() { - also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests); - break_on_failure_ = GTEST_FLAG(break_on_failure); - catch_exceptions_ = GTEST_FLAG(catch_exceptions); - color_ = GTEST_FLAG(color); - death_test_style_ = GTEST_FLAG(death_test_style); - death_test_use_fork_ = GTEST_FLAG(death_test_use_fork); - filter_ = GTEST_FLAG(filter); - internal_run_death_test_ = GTEST_FLAG(internal_run_death_test); - list_tests_ = GTEST_FLAG(list_tests); - output_ = GTEST_FLAG(output); - print_time_ = GTEST_FLAG(print_time); - random_seed_ = GTEST_FLAG(random_seed); - repeat_ = GTEST_FLAG(repeat); - shuffle_ = GTEST_FLAG(shuffle); - stack_trace_depth_ = GTEST_FLAG(stack_trace_depth); - stream_result_to_ = GTEST_FLAG(stream_result_to); - throw_on_failure_ = GTEST_FLAG(throw_on_failure); - } - - // The d'tor is not virtual. DO NOT INHERIT FROM THIS CLASS. - ~GTestFlagSaver() { - GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_; - GTEST_FLAG(break_on_failure) = break_on_failure_; - GTEST_FLAG(catch_exceptions) = catch_exceptions_; - GTEST_FLAG(color) = color_; - GTEST_FLAG(death_test_style) = death_test_style_; - GTEST_FLAG(death_test_use_fork) = death_test_use_fork_; - GTEST_FLAG(filter) = filter_; - GTEST_FLAG(internal_run_death_test) = internal_run_death_test_; - GTEST_FLAG(list_tests) = list_tests_; - GTEST_FLAG(output) = output_; - GTEST_FLAG(print_time) = print_time_; - GTEST_FLAG(random_seed) = random_seed_; - GTEST_FLAG(repeat) = repeat_; - GTEST_FLAG(shuffle) = shuffle_; - GTEST_FLAG(stack_trace_depth) = stack_trace_depth_; - GTEST_FLAG(stream_result_to) = stream_result_to_; - GTEST_FLAG(throw_on_failure) = throw_on_failure_; - } - private: - // Fields for saving the original values of flags. - bool also_run_disabled_tests_; - bool break_on_failure_; - bool catch_exceptions_; - String color_; - String death_test_style_; - bool death_test_use_fork_; - String filter_; - String internal_run_death_test_; - bool list_tests_; - String output_; - bool print_time_; - bool pretty_; - internal::Int32 random_seed_; - internal::Int32 repeat_; - bool shuffle_; - internal::Int32 stack_trace_depth_; - String stream_result_to_; - bool throw_on_failure_; -} GTEST_ATTRIBUTE_UNUSED_; - -// Converts a Unicode code point to a narrow string in UTF-8 encoding. -// code_point parameter is of type UInt32 because wchar_t may not be -// wide enough to contain a code point. -// The output buffer str must containt at least 32 characters. -// The function returns the address of the output buffer. -// If the code_point is not a valid Unicode code point -// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output -// as '(Invalid Unicode 0xXXXXXXXX)'. -GTEST_API_ char* CodePointToUtf8(UInt32 code_point, char* str); - -// Converts a wide string to a narrow string in UTF-8 encoding. -// The wide string is assumed to have the following encoding: -// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS) -// UTF-32 if sizeof(wchar_t) == 4 (on Linux) -// Parameter str points to a null-terminated wide string. -// Parameter num_chars may additionally limit the number -// of wchar_t characters processed. -1 is used when the entire string -// should be processed. -// If the string contains code points that are not valid Unicode code points -// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output -// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding -// and contains invalid UTF-16 surrogate pairs, values in those pairs -// will be encoded as individual Unicode characters from Basic Normal Plane. -GTEST_API_ String WideStringToUtf8(const wchar_t* str, int num_chars); - -// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file -// if the variable is present. If a file already exists at this location, this -// function will write over it. If the variable is present, but the file cannot -// be created, prints an error and exits. -void WriteToShardStatusFileIfNeeded(); - -// Checks whether sharding is enabled by examining the relevant -// environment variable values. If the variables are present, -// but inconsistent (e.g., shard_index >= total_shards), prints -// an error and exits. If in_subprocess_for_death_test, sharding is -// disabled because it must only be applied to the original test -// process. Otherwise, we could filter out death tests we intended to execute. -GTEST_API_ bool ShouldShard(const char* total_shards_str, - const char* shard_index_str, - bool in_subprocess_for_death_test); - -// Parses the environment variable var as an Int32. If it is unset, -// returns default_val. If it is not an Int32, prints an error and -// and aborts. -GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val); - -// Given the total number of shards, the shard index, and the test id, -// returns true iff the test should be run on this shard. The test id is -// some arbitrary but unique non-negative integer assigned to each test -// method. Assumes that 0 <= shard_index < total_shards. -GTEST_API_ bool ShouldRunTestOnShard( - int total_shards, int shard_index, int test_id); - -// STL container utilities. - -// Returns the number of elements in the given container that satisfy -// the given predicate. -template -inline int CountIf(const Container& c, Predicate predicate) { - // Implemented as an explicit loop since std::count_if() in libCstd on - // Solaris has a non-standard signature. - int count = 0; - for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) { - if (predicate(*it)) - ++count; - } - return count; -} - -// Applies a function/functor to each element in the container. -template -void ForEach(const Container& c, Functor functor) { - std::for_each(c.begin(), c.end(), functor); -} - -// Returns the i-th element of the vector, or default_value if i is not -// in range [0, v.size()). -template -inline E GetElementOr(const std::vector& v, int i, E default_value) { - return (i < 0 || i >= static_cast(v.size())) ? default_value : v[i]; -} - -// Performs an in-place shuffle of a range of the vector's elements. -// 'begin' and 'end' are element indices as an STL-style range; -// i.e. [begin, end) are shuffled, where 'end' == size() means to -// shuffle to the end of the vector. -template -void ShuffleRange(internal::Random* random, int begin, int end, - std::vector* v) { - const int size = static_cast(v->size()); - GTEST_CHECK_(0 <= begin && begin <= size) - << "Invalid shuffle range start " << begin << ": must be in range [0, " - << size << "]."; - GTEST_CHECK_(begin <= end && end <= size) - << "Invalid shuffle range finish " << end << ": must be in range [" - << begin << ", " << size << "]."; - - // Fisher-Yates shuffle, from - // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle - for (int range_width = end - begin; range_width >= 2; range_width--) { - const int last_in_range = begin + range_width - 1; - const int selected = begin + random->Generate(range_width); - std::swap((*v)[selected], (*v)[last_in_range]); - } -} - -// Performs an in-place shuffle of the vector's elements. -template -inline void Shuffle(internal::Random* random, std::vector* v) { - ShuffleRange(random, 0, static_cast(v->size()), v); -} - -// A function for deleting an object. Handy for being used as a -// functor. -template -static void Delete(T* x) { - delete x; -} - -// A predicate that checks the key of a TestProperty against a known key. -// -// TestPropertyKeyIs is copyable. -class TestPropertyKeyIs { - public: - // Constructor. - // - // TestPropertyKeyIs has NO default constructor. - explicit TestPropertyKeyIs(const char* key) - : key_(key) {} - - // Returns true iff the test name of test property matches on key_. - bool operator()(const TestProperty& test_property) const { - return String(test_property.key()).Compare(key_) == 0; - } - - private: - String key_; -}; - -// Class UnitTestOptions. -// -// This class contains functions for processing options the user -// specifies when running the tests. It has only static members. -// -// In most cases, the user can specify an option using either an -// environment variable or a command line flag. E.g. you can set the -// test filter using either GTEST_FILTER or --gtest_filter. If both -// the variable and the flag are present, the latter overrides the -// former. -class GTEST_API_ UnitTestOptions { - public: - // Functions for processing the gtest_output flag. - - // Returns the output format, or "" for normal printed output. - static String GetOutputFormat(); - - // Returns the absolute path of the requested output file, or the - // default (test_detail.xml in the original working directory) if - // none was explicitly specified. - static String GetAbsolutePathToOutputFile(); - - // Functions for processing the gtest_filter flag. - - // Returns true iff the wildcard pattern matches the string. The - // first ':' or '\0' character in pattern marks the end of it. - // - // This recursive algorithm isn't very efficient, but is clear and - // works well enough for matching test names, which are short. - static bool PatternMatchesString(const char *pattern, const char *str); - - // Returns true iff the user-specified filter matches the test case - // name and the test name. - static bool FilterMatchesTest(const String &test_case_name, - const String &test_name); - -#if GTEST_OS_WINDOWS - // Function for supporting the gtest_catch_exception flag. - - // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the - // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. - // This function is useful as an __except condition. - static int GTestShouldProcessSEH(DWORD exception_code); -#endif // GTEST_OS_WINDOWS - - // Returns true if "name" matches the ':' separated list of glob-style - // filters in "filter". - static bool MatchesFilter(const String& name, const char* filter); -}; - -// Returns the current application's name, removing directory path if that -// is present. Used by UnitTestOptions::GetOutputFile. -GTEST_API_ FilePath GetCurrentExecutableName(); - -// The role interface for getting the OS stack trace as a string. -class OsStackTraceGetterInterface { - public: - OsStackTraceGetterInterface() {} - virtual ~OsStackTraceGetterInterface() {} - - // Returns the current OS stack trace as a String. Parameters: - // - // max_depth - the maximum number of stack frames to be included - // in the trace. - // skip_count - the number of top frames to be skipped; doesn't count - // against max_depth. - virtual String CurrentStackTrace(int max_depth, int skip_count) = 0; - - // UponLeavingGTest() should be called immediately before Google Test calls - // user code. It saves some information about the current stack that - // CurrentStackTrace() will use to find and hide Google Test stack frames. - virtual void UponLeavingGTest() = 0; - - private: - GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface); -}; - -// A working implementation of the OsStackTraceGetterInterface interface. -class OsStackTraceGetter : public OsStackTraceGetterInterface { - public: - OsStackTraceGetter() : caller_frame_(NULL) {} - virtual String CurrentStackTrace(int max_depth, int skip_count); - virtual void UponLeavingGTest(); - - // This string is inserted in place of stack frames that are part of - // Google Test's implementation. - static const char* const kElidedFramesMarker; - - private: - Mutex mutex_; // protects all internal state - - // We save the stack frame below the frame that calls user code. - // We do this because the address of the frame immediately below - // the user code changes between the call to UponLeavingGTest() - // and any calls to CurrentStackTrace() from within the user code. - void* caller_frame_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter); -}; - -// Information about a Google Test trace point. -struct TraceInfo { - const char* file; - int line; - String message; -}; - -// This is the default global test part result reporter used in UnitTestImpl. -// This class should only be used by UnitTestImpl. -class DefaultGlobalTestPartResultReporter - : public TestPartResultReporterInterface { - public: - explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test); - // Implements the TestPartResultReporterInterface. Reports the test part - // result in the current test. - virtual void ReportTestPartResult(const TestPartResult& result); - - private: - UnitTestImpl* const unit_test_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter); -}; - -// This is the default per thread test part result reporter used in -// UnitTestImpl. This class should only be used by UnitTestImpl. -class DefaultPerThreadTestPartResultReporter - : public TestPartResultReporterInterface { - public: - explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test); - // Implements the TestPartResultReporterInterface. The implementation just - // delegates to the current global test part result reporter of *unit_test_. - virtual void ReportTestPartResult(const TestPartResult& result); - - private: - UnitTestImpl* const unit_test_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter); -}; - -// The private implementation of the UnitTest class. We don't protect -// the methods under a mutex, as this class is not accessible by a -// user and the UnitTest class that delegates work to this class does -// proper locking. -class GTEST_API_ UnitTestImpl { - public: - explicit UnitTestImpl(UnitTest* parent); - virtual ~UnitTestImpl(); - - // There are two different ways to register your own TestPartResultReporter. - // You can register your own repoter to listen either only for test results - // from the current thread or for results from all threads. - // By default, each per-thread test result repoter just passes a new - // TestPartResult to the global test result reporter, which registers the - // test part result for the currently running test. - - // Returns the global test part result reporter. - TestPartResultReporterInterface* GetGlobalTestPartResultReporter(); - - // Sets the global test part result reporter. - void SetGlobalTestPartResultReporter( - TestPartResultReporterInterface* reporter); - - // Returns the test part result reporter for the current thread. - TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread(); - - // Sets the test part result reporter for the current thread. - void SetTestPartResultReporterForCurrentThread( - TestPartResultReporterInterface* reporter); - - // Gets the number of successful test cases. - int successful_test_case_count() const; - - // Gets the number of failed test cases. - int failed_test_case_count() const; - - // Gets the number of all test cases. - int total_test_case_count() const; - - // Gets the number of all test cases that contain at least one test - // that should run. - int test_case_to_run_count() const; - - // Gets the number of successful tests. - int successful_test_count() const; - - // Gets the number of failed tests. - int failed_test_count() const; - - // Gets the number of disabled tests. - int disabled_test_count() const; - - // Gets the number of all tests. - int total_test_count() const; - - // Gets the number of tests that should run. - int test_to_run_count() const; - - // Gets the elapsed time, in milliseconds. - TimeInMillis elapsed_time() const { return elapsed_time_; } - - // Returns true iff the unit test passed (i.e. all test cases passed). - bool Passed() const { return !Failed(); } - - // Returns true iff the unit test failed (i.e. some test case failed - // or something outside of all tests failed). - bool Failed() const { - return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed(); - } - - // Gets the i-th test case among all the test cases. i can range from 0 to - // total_test_case_count() - 1. If i is not in that range, returns NULL. - const TestCase* GetTestCase(int i) const { - const int index = GetElementOr(test_case_indices_, i, -1); - return index < 0 ? NULL : test_cases_[i]; - } - - // Gets the i-th test case among all the test cases. i can range from 0 to - // total_test_case_count() - 1. If i is not in that range, returns NULL. - TestCase* GetMutableTestCase(int i) { - const int index = GetElementOr(test_case_indices_, i, -1); - return index < 0 ? NULL : test_cases_[index]; - } - - // Provides access to the event listener list. - TestEventListeners* listeners() { return &listeners_; } - - // Returns the TestResult for the test that's currently running, or - // the TestResult for the ad hoc test if no test is running. - TestResult* current_test_result(); - - // Returns the TestResult for the ad hoc test. - const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; } - - // Sets the OS stack trace getter. - // - // Does nothing if the input and the current OS stack trace getter - // are the same; otherwise, deletes the old getter and makes the - // input the current getter. - void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter); - - // Returns the current OS stack trace getter if it is not NULL; - // otherwise, creates an OsStackTraceGetter, makes it the current - // getter, and returns it. - OsStackTraceGetterInterface* os_stack_trace_getter(); - - // Returns the current OS stack trace as a String. - // - // The maximum number of stack frames to be included is specified by - // the gtest_stack_trace_depth flag. The skip_count parameter - // specifies the number of top frames to be skipped, which doesn't - // count against the number of frames to be included. - // - // For example, if Foo() calls Bar(), which in turn calls - // CurrentOsStackTraceExceptTop(1), Foo() will be included in the - // trace but Bar() and CurrentOsStackTraceExceptTop() won't. - String CurrentOsStackTraceExceptTop(int skip_count); - - // Finds and returns a TestCase with the given name. If one doesn't - // exist, creates one and returns it. - // - // Arguments: - // - // test_case_name: name of the test case - // type_param: the name of the test's type parameter, or NULL if - // this is not a typed or a type-parameterized test. - // set_up_tc: pointer to the function that sets up the test case - // tear_down_tc: pointer to the function that tears down the test case - TestCase* GetTestCase(const char* test_case_name, - const char* type_param, - Test::SetUpTestCaseFunc set_up_tc, - Test::TearDownTestCaseFunc tear_down_tc); - - // Adds a TestInfo to the unit test. - // - // Arguments: - // - // set_up_tc: pointer to the function that sets up the test case - // tear_down_tc: pointer to the function that tears down the test case - // test_info: the TestInfo object - void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc, - Test::TearDownTestCaseFunc tear_down_tc, - TestInfo* test_info) { - // In order to support thread-safe death tests, we need to - // remember the original working directory when the test program - // was first invoked. We cannot do this in RUN_ALL_TESTS(), as - // the user may have changed the current directory before calling - // RUN_ALL_TESTS(). Therefore we capture the current directory in - // AddTestInfo(), which is called to register a TEST or TEST_F - // before main() is reached. - if (original_working_dir_.IsEmpty()) { - original_working_dir_.Set(FilePath::GetCurrentDir()); - GTEST_CHECK_(!original_working_dir_.IsEmpty()) - << "Failed to get the current working directory."; - } - - GetTestCase(test_info->test_case_name(), - test_info->type_param(), - set_up_tc, - tear_down_tc)->AddTestInfo(test_info); - } - -#if GTEST_HAS_PARAM_TEST - // Returns ParameterizedTestCaseRegistry object used to keep track of - // value-parameterized tests and instantiate and register them. - internal::ParameterizedTestCaseRegistry& parameterized_test_registry() { - return parameterized_test_registry_; - } -#endif // GTEST_HAS_PARAM_TEST - - // Sets the TestCase object for the test that's currently running. - void set_current_test_case(TestCase* a_current_test_case) { - current_test_case_ = a_current_test_case; - } - - // Sets the TestInfo object for the test that's currently running. If - // current_test_info is NULL, the assertion results will be stored in - // ad_hoc_test_result_. - void set_current_test_info(TestInfo* a_current_test_info) { - current_test_info_ = a_current_test_info; - } - - // Registers all parameterized tests defined using TEST_P and - // INSTANTIATE_TEST_CASE_P, creating regular tests for each test/parameter - // combination. This method can be called more then once; it has guards - // protecting from registering the tests more then once. If - // value-parameterized tests are disabled, RegisterParameterizedTests is - // present but does nothing. - void RegisterParameterizedTests(); - - // Runs all tests in this UnitTest object, prints the result, and - // returns true if all tests are successful. If any exception is - // thrown during a test, this test is considered to be failed, but - // the rest of the tests will still be run. - bool RunAllTests(); - - // Clears the results of all tests, except the ad hoc tests. - void ClearNonAdHocTestResult() { - ForEach(test_cases_, TestCase::ClearTestCaseResult); - } - - // Clears the results of ad-hoc test assertions. - void ClearAdHocTestResult() { - ad_hoc_test_result_.Clear(); - } - - enum ReactionToSharding { - HONOR_SHARDING_PROTOCOL, - IGNORE_SHARDING_PROTOCOL - }; - - // Matches the full name of each test against the user-specified - // filter to decide whether the test should run, then records the - // result in each TestCase and TestInfo object. - // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests - // based on sharding variables in the environment. - // Returns the number of tests that should run. - int FilterTests(ReactionToSharding shard_tests); - - // Prints the names of the tests matching the user-specified filter flag. - void ListTestsMatchingFilter(); - - const TestCase* current_test_case() const { return current_test_case_; } - TestInfo* current_test_info() { return current_test_info_; } - const TestInfo* current_test_info() const { return current_test_info_; } - - // Returns the vector of environments that need to be set-up/torn-down - // before/after the tests are run. - std::vector& environments() { return environments_; } - - // Getters for the per-thread Google Test trace stack. - std::vector& gtest_trace_stack() { - return *(gtest_trace_stack_.pointer()); - } - const std::vector& gtest_trace_stack() const { - return gtest_trace_stack_.get(); - } - -#if GTEST_HAS_DEATH_TEST - void InitDeathTestSubprocessControlInfo() { - internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag()); - } - // Returns a pointer to the parsed --gtest_internal_run_death_test - // flag, or NULL if that flag was not specified. - // This information is useful only in a death test child process. - // Must not be called before a call to InitGoogleTest. - const InternalRunDeathTestFlag* internal_run_death_test_flag() const { - return internal_run_death_test_flag_.get(); - } - - // Returns a pointer to the current death test factory. - internal::DeathTestFactory* death_test_factory() { - return death_test_factory_.get(); - } - - void SuppressTestEventsIfInSubprocess(); - - friend class ReplaceDeathTestFactory; -#endif // GTEST_HAS_DEATH_TEST - - // Initializes the event listener performing XML output as specified by - // UnitTestOptions. Must not be called before InitGoogleTest. - void ConfigureXmlOutput(); - -#if GTEST_CAN_STREAM_RESULTS_ - // Initializes the event listener for streaming test results to a socket. - // Must not be called before InitGoogleTest. - void ConfigureStreamingOutput(); -#endif - - // Performs initialization dependent upon flag values obtained in - // ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to - // ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest - // this function is also called from RunAllTests. Since this function can be - // called more than once, it has to be idempotent. - void PostFlagParsingInit(); - - // Gets the random seed used at the start of the current test iteration. - int random_seed() const { return random_seed_; } - - // Gets the random number generator. - internal::Random* random() { return &random_; } - - // Shuffles all test cases, and the tests within each test case, - // making sure that death tests are still run first. - void ShuffleTests(); - - // Restores the test cases and tests to their order before the first shuffle. - void UnshuffleTests(); - - // Returns the value of GTEST_FLAG(catch_exceptions) at the moment - // UnitTest::Run() starts. - bool catch_exceptions() const { return catch_exceptions_; } - - private: - friend class ::testing::UnitTest; - - // Used by UnitTest::Run() to capture the state of - // GTEST_FLAG(catch_exceptions) at the moment it starts. - void set_catch_exceptions(bool value) { catch_exceptions_ = value; } - - // The UnitTest object that owns this implementation object. - UnitTest* const parent_; - - // The working directory when the first TEST() or TEST_F() was - // executed. - internal::FilePath original_working_dir_; - - // The default test part result reporters. - DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_; - DefaultPerThreadTestPartResultReporter - default_per_thread_test_part_result_reporter_; - - // Points to (but doesn't own) the global test part result reporter. - TestPartResultReporterInterface* global_test_part_result_repoter_; - - // Protects read and write access to global_test_part_result_reporter_. - internal::Mutex global_test_part_result_reporter_mutex_; - - // Points to (but doesn't own) the per-thread test part result reporter. - internal::ThreadLocal - per_thread_test_part_result_reporter_; - - // The vector of environments that need to be set-up/torn-down - // before/after the tests are run. - std::vector environments_; - - // The vector of TestCases in their original order. It owns the - // elements in the vector. - std::vector test_cases_; - - // Provides a level of indirection for the test case list to allow - // easy shuffling and restoring the test case order. The i-th - // element of this vector is the index of the i-th test case in the - // shuffled order. - std::vector test_case_indices_; - -#if GTEST_HAS_PARAM_TEST - // ParameterizedTestRegistry object used to register value-parameterized - // tests. - internal::ParameterizedTestCaseRegistry parameterized_test_registry_; - - // Indicates whether RegisterParameterizedTests() has been called already. - bool parameterized_tests_registered_; -#endif // GTEST_HAS_PARAM_TEST - - // Index of the last death test case registered. Initially -1. - int last_death_test_case_; - - // This points to the TestCase for the currently running test. It - // changes as Google Test goes through one test case after another. - // When no test is running, this is set to NULL and Google Test - // stores assertion results in ad_hoc_test_result_. Initially NULL. - TestCase* current_test_case_; - - // This points to the TestInfo for the currently running test. It - // changes as Google Test goes through one test after another. When - // no test is running, this is set to NULL and Google Test stores - // assertion results in ad_hoc_test_result_. Initially NULL. - TestInfo* current_test_info_; - - // Normally, a user only writes assertions inside a TEST or TEST_F, - // or inside a function called by a TEST or TEST_F. Since Google - // Test keeps track of which test is current running, it can - // associate such an assertion with the test it belongs to. - // - // If an assertion is encountered when no TEST or TEST_F is running, - // Google Test attributes the assertion result to an imaginary "ad hoc" - // test, and records the result in ad_hoc_test_result_. - TestResult ad_hoc_test_result_; - - // The list of event listeners that can be used to track events inside - // Google Test. - TestEventListeners listeners_; - - // The OS stack trace getter. Will be deleted when the UnitTest - // object is destructed. By default, an OsStackTraceGetter is used, - // but the user can set this field to use a custom getter if that is - // desired. - OsStackTraceGetterInterface* os_stack_trace_getter_; - - // True iff PostFlagParsingInit() has been called. - bool post_flag_parse_init_performed_; - - // The random number seed used at the beginning of the test run. - int random_seed_; - - // Our random number generator. - internal::Random random_; - - // How long the test took to run, in milliseconds. - TimeInMillis elapsed_time_; - -#if GTEST_HAS_DEATH_TEST - // The decomposed components of the gtest_internal_run_death_test flag, - // parsed when RUN_ALL_TESTS is called. - internal::scoped_ptr internal_run_death_test_flag_; - internal::scoped_ptr death_test_factory_; -#endif // GTEST_HAS_DEATH_TEST - - // A per-thread stack of traces created by the SCOPED_TRACE() macro. - internal::ThreadLocal > gtest_trace_stack_; - - // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests() - // starts. - bool catch_exceptions_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl); -}; // class UnitTestImpl - -// Convenience function for accessing the global UnitTest -// implementation object. -inline UnitTestImpl* GetUnitTestImpl() { - return UnitTest::GetInstance()->impl(); -} - -#if GTEST_USES_SIMPLE_RE - -// Internal helper functions for implementing the simple regular -// expression matcher. -GTEST_API_ bool IsInSet(char ch, const char* str); -GTEST_API_ bool IsAsciiDigit(char ch); -GTEST_API_ bool IsAsciiPunct(char ch); -GTEST_API_ bool IsRepeat(char ch); -GTEST_API_ bool IsAsciiWhiteSpace(char ch); -GTEST_API_ bool IsAsciiWordChar(char ch); -GTEST_API_ bool IsValidEscape(char ch); -GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch); -GTEST_API_ bool ValidateRegex(const char* regex); -GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str); -GTEST_API_ bool MatchRepetitionAndRegexAtHead( - bool escaped, char ch, char repeat, const char* regex, const char* str); -GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str); - -#endif // GTEST_USES_SIMPLE_RE - -// Parses the command line for Google Test flags, without initializing -// other parts of Google Test. -GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv); -GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv); - -#if GTEST_HAS_DEATH_TEST - -// Returns the message describing the last system error, regardless of the -// platform. -GTEST_API_ String GetLastErrnoDescription(); - -# if GTEST_OS_WINDOWS -// Provides leak-safe Windows kernel handle ownership. -class AutoHandle { - public: - AutoHandle() : handle_(INVALID_HANDLE_VALUE) {} - explicit AutoHandle(HANDLE handle) : handle_(handle) {} - - ~AutoHandle() { Reset(); } - - HANDLE Get() const { return handle_; } - void Reset() { Reset(INVALID_HANDLE_VALUE); } - void Reset(HANDLE handle) { - if (handle != handle_) { - if (handle_ != INVALID_HANDLE_VALUE) - ::CloseHandle(handle_); - handle_ = handle; - } - } - - private: - HANDLE handle_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle); -}; -# endif // GTEST_OS_WINDOWS - -// Attempts to parse a string into a positive integer pointed to by the -// number parameter. Returns true if that is possible. -// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use -// it here. -template -bool ParseNaturalNumber(const ::std::string& str, Integer* number) { - // Fail fast if the given string does not begin with a digit; - // this bypasses strtoXXX's "optional leading whitespace and plus - // or minus sign" semantics, which are undesirable here. - if (str.empty() || !IsDigit(str[0])) { - return false; - } - errno = 0; - - char* end; - // BiggestConvertible is the largest integer type that system-provided - // string-to-number conversion routines can return. - -# if GTEST_OS_WINDOWS && !defined(__GNUC__) - - // MSVC and C++ Builder define __int64 instead of the standard long long. - typedef unsigned __int64 BiggestConvertible; - const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10); - -# else - - typedef unsigned long long BiggestConvertible; // NOLINT - const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10); - -# endif // GTEST_OS_WINDOWS && !defined(__GNUC__) - - const bool parse_success = *end == '\0' && errno == 0; - - // TODO(vladl@google.com): Convert this to compile time assertion when it is - // available. - GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed)); - - const Integer result = static_cast(parsed); - if (parse_success && static_cast(result) == parsed) { - *number = result; - return true; - } - return false; -} -#endif // GTEST_HAS_DEATH_TEST - -// TestResult contains some private methods that should be hidden from -// Google Test user but are required for testing. This class allow our tests -// to access them. -// -// This class is supplied only for the purpose of testing Google Test's own -// constructs. Do not use it in user tests, either directly or indirectly. -class TestResultAccessor { - public: - static void RecordProperty(TestResult* test_result, - const TestProperty& property) { - test_result->RecordProperty(property); - } - - static void ClearTestPartResults(TestResult* test_result) { - test_result->ClearTestPartResults(); - } - - static const std::vector& test_part_results( - const TestResult& test_result) { - return test_result.test_part_results(); - } -}; - -} // namespace internal -} // namespace testing - -#endif // GTEST_SRC_GTEST_INTERNAL_INL_H_ -#undef GTEST_IMPLEMENTATION_ - -#if GTEST_OS_WINDOWS -# define vsnprintf _vsnprintf -#endif // GTEST_OS_WINDOWS - -namespace testing { - -using internal::CountIf; -using internal::ForEach; -using internal::GetElementOr; -using internal::Shuffle; - -// Constants. - -// A test whose test case name or test name matches this filter is -// disabled and not run. -static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*"; - -// A test case whose name matches this filter is considered a death -// test case and will be run before test cases whose name doesn't -// match this filter. -static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*"; - -// A test filter that matches everything. -static const char kUniversalFilter[] = "*"; - -// The default output file for XML output. -static const char kDefaultOutputFile[] = "test_detail.xml"; - -// The environment variable name for the test shard index. -static const char kTestShardIndex[] = "GTEST_SHARD_INDEX"; -// The environment variable name for the total number of test shards. -static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS"; -// The environment variable name for the test shard status file. -static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE"; - -namespace internal { - -// The text used in failure messages to indicate the start of the -// stack trace. -const char kStackTraceMarker[] = "\nStack trace:\n"; - -// g_help_flag is true iff the --help flag or an equivalent form is -// specified on the command line. -bool g_help_flag = false; - -} // namespace internal - -GTEST_DEFINE_bool_( - also_run_disabled_tests, - internal::BoolFromGTestEnv("also_run_disabled_tests", false), - "Run disabled tests too, in addition to the tests normally being run."); - -GTEST_DEFINE_bool_( - break_on_failure, - internal::BoolFromGTestEnv("break_on_failure", false), - "True iff a failed assertion should be a debugger break-point."); - -GTEST_DEFINE_bool_( - catch_exceptions, - internal::BoolFromGTestEnv("catch_exceptions", true), - "True iff " GTEST_NAME_ - " should catch exceptions and treat them as test failures."); - -GTEST_DEFINE_string_( - color, - internal::StringFromGTestEnv("color", "auto"), - "Whether to use colors in the output. Valid values: yes, no, " - "and auto. 'auto' means to use colors if the output is " - "being sent to a terminal and the TERM environment variable " - "is set to xterm, xterm-color, xterm-256color, linux or cygwin."); - -GTEST_DEFINE_string_( - filter, - internal::StringFromGTestEnv("filter", kUniversalFilter), - "A colon-separated list of glob (not regex) patterns " - "for filtering the tests to run, optionally followed by a " - "'-' and a : separated list of negative patterns (tests to " - "exclude). A test is run if it matches one of the positive " - "patterns and does not match any of the negative patterns."); - -GTEST_DEFINE_bool_(list_tests, false, - "List all tests without running them."); - -GTEST_DEFINE_string_( - output, - internal::StringFromGTestEnv("output", ""), - "A format (currently must be \"xml\"), optionally followed " - "by a colon and an output file name or directory. A directory " - "is indicated by a trailing pathname separator. " - "Examples: \"xml:filename.xml\", \"xml::directoryname/\". " - "If a directory is specified, output files will be created " - "within that directory, with file-names based on the test " - "executable's name and, if necessary, made unique by adding " - "digits."); - -GTEST_DEFINE_bool_( - print_time, - internal::BoolFromGTestEnv("print_time", true), - "True iff " GTEST_NAME_ - " should display elapsed time in text output."); - -GTEST_DEFINE_int32_( - random_seed, - internal::Int32FromGTestEnv("random_seed", 0), - "Random number seed to use when shuffling test orders. Must be in range " - "[1, 99999], or 0 to use a seed based on the current time."); - -GTEST_DEFINE_int32_( - repeat, - internal::Int32FromGTestEnv("repeat", 1), - "How many times to repeat each test. Specify a negative number " - "for repeating forever. Useful for shaking out flaky tests."); - -GTEST_DEFINE_bool_( - show_internal_stack_frames, false, - "True iff " GTEST_NAME_ " should include internal stack frames when " - "printing test failure stack traces."); - -GTEST_DEFINE_bool_( - shuffle, - internal::BoolFromGTestEnv("shuffle", false), - "True iff " GTEST_NAME_ - " should randomize tests' order on every run."); - -GTEST_DEFINE_int32_( - stack_trace_depth, - internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth), - "The maximum number of stack frames to print when an " - "assertion fails. The valid range is 0 through 100, inclusive."); - -GTEST_DEFINE_string_( - stream_result_to, - internal::StringFromGTestEnv("stream_result_to", ""), - "This flag specifies the host name and the port number on which to stream " - "test results. Example: \"localhost:555\". The flag is effective only on " - "Linux."); - -GTEST_DEFINE_bool_( - throw_on_failure, - internal::BoolFromGTestEnv("throw_on_failure", false), - "When this flag is specified, a failed assertion will throw an exception " - "if exceptions are enabled or exit the program with a non-zero code " - "otherwise."); - -namespace internal { - -// Generates a random number from [0, range), using a Linear -// Congruential Generator (LCG). Crashes if 'range' is 0 or greater -// than kMaxRange. -UInt32 Random::Generate(UInt32 range) { - // These constants are the same as are used in glibc's rand(3). - state_ = (1103515245U*state_ + 12345U) % kMaxRange; - - GTEST_CHECK_(range > 0) - << "Cannot generate a number in the range [0, 0)."; - GTEST_CHECK_(range <= kMaxRange) - << "Generation of a number in [0, " << range << ") was requested, " - << "but this can only generate numbers in [0, " << kMaxRange << ")."; - - // Converting via modulus introduces a bit of downward bias, but - // it's simple, and a linear congruential generator isn't too good - // to begin with. - return state_ % range; -} - -// GTestIsInitialized() returns true iff the user has initialized -// Google Test. Useful for catching the user mistake of not initializing -// Google Test before calling RUN_ALL_TESTS(). -// -// A user must call testing::InitGoogleTest() to initialize Google -// Test. g_init_gtest_count is set to the number of times -// InitGoogleTest() has been called. We don't protect this variable -// under a mutex as it is only accessed in the main thread. -int g_init_gtest_count = 0; -static bool GTestIsInitialized() { return g_init_gtest_count != 0; } - -// Iterates over a vector of TestCases, keeping a running sum of the -// results of calling a given int-returning method on each. -// Returns the sum. -static int SumOverTestCaseList(const std::vector& case_list, - int (TestCase::*method)() const) { - int sum = 0; - for (size_t i = 0; i < case_list.size(); i++) { - sum += (case_list[i]->*method)(); - } - return sum; -} - -// Returns true iff the test case passed. -static bool TestCasePassed(const TestCase* test_case) { - return test_case->should_run() && test_case->Passed(); -} - -// Returns true iff the test case failed. -static bool TestCaseFailed(const TestCase* test_case) { - return test_case->should_run() && test_case->Failed(); -} - -// Returns true iff test_case contains at least one test that should -// run. -static bool ShouldRunTestCase(const TestCase* test_case) { - return test_case->should_run(); -} - -// AssertHelper constructor. -AssertHelper::AssertHelper(TestPartResult::Type type, - const char* file, - int line, - const char* message) - : data_(new AssertHelperData(type, file, line, message)) { -} - -AssertHelper::~AssertHelper() { - delete data_; -} - -// Message assignment, for assertion streaming support. -void AssertHelper::operator=(const Message& message) const { - UnitTest::GetInstance()-> - AddTestPartResult(data_->type, data_->file, data_->line, - AppendUserMessage(data_->message, message), - UnitTest::GetInstance()->impl() - ->CurrentOsStackTraceExceptTop(1) - // Skips the stack frame for this function itself. - ); // NOLINT -} - -// Mutex for linked pointers. -GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex); - -// Application pathname gotten in InitGoogleTest. -String g_executable_path; - -// Returns the current application's name, removing directory path if that -// is present. -FilePath GetCurrentExecutableName() { - FilePath result; - -#if GTEST_OS_WINDOWS - result.Set(FilePath(g_executable_path).RemoveExtension("exe")); -#else - result.Set(FilePath(g_executable_path)); -#endif // GTEST_OS_WINDOWS - - return result.RemoveDirectoryName(); -} - -// Functions for processing the gtest_output flag. - -// Returns the output format, or "" for normal printed output. -String UnitTestOptions::GetOutputFormat() { - const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); - if (gtest_output_flag == NULL) return String(""); - - const char* const colon = strchr(gtest_output_flag, ':'); - return (colon == NULL) ? - String(gtest_output_flag) : - String(gtest_output_flag, colon - gtest_output_flag); -} - -// Returns the name of the requested output file, or the default if none -// was explicitly specified. -String UnitTestOptions::GetAbsolutePathToOutputFile() { - const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); - if (gtest_output_flag == NULL) - return String(""); - - const char* const colon = strchr(gtest_output_flag, ':'); - if (colon == NULL) - return String(internal::FilePath::ConcatPaths( - internal::FilePath( - UnitTest::GetInstance()->original_working_dir()), - internal::FilePath(kDefaultOutputFile)).ToString() ); - - internal::FilePath output_name(colon + 1); - if (!output_name.IsAbsolutePath()) - // TODO(wan@google.com): on Windows \some\path is not an absolute - // path (as its meaning depends on the current drive), yet the - // following logic for turning it into an absolute path is wrong. - // Fix it. - output_name = internal::FilePath::ConcatPaths( - internal::FilePath(UnitTest::GetInstance()->original_working_dir()), - internal::FilePath(colon + 1)); - - if (!output_name.IsDirectory()) - return output_name.ToString(); - - internal::FilePath result(internal::FilePath::GenerateUniqueFileName( - output_name, internal::GetCurrentExecutableName(), - GetOutputFormat().c_str())); - return result.ToString(); -} - -// Returns true iff the wildcard pattern matches the string. The -// first ':' or '\0' character in pattern marks the end of it. -// -// This recursive algorithm isn't very efficient, but is clear and -// works well enough for matching test names, which are short. -bool UnitTestOptions::PatternMatchesString(const char *pattern, - const char *str) { - switch (*pattern) { - case '\0': - case ':': // Either ':' or '\0' marks the end of the pattern. - return *str == '\0'; - case '?': // Matches any single character. - return *str != '\0' && PatternMatchesString(pattern + 1, str + 1); - case '*': // Matches any string (possibly empty) of characters. - return (*str != '\0' && PatternMatchesString(pattern, str + 1)) || - PatternMatchesString(pattern + 1, str); - default: // Non-special character. Matches itself. - return *pattern == *str && - PatternMatchesString(pattern + 1, str + 1); - } -} - -bool UnitTestOptions::MatchesFilter(const String& name, const char* filter) { - const char *cur_pattern = filter; - for (;;) { - if (PatternMatchesString(cur_pattern, name.c_str())) { - return true; - } - - // Finds the next pattern in the filter. - cur_pattern = strchr(cur_pattern, ':'); - - // Returns if no more pattern can be found. - if (cur_pattern == NULL) { - return false; - } - - // Skips the pattern separater (the ':' character). - cur_pattern++; - } -} - -// TODO(keithray): move String function implementations to gtest-string.cc. - -// Returns true iff the user-specified filter matches the test case -// name and the test name. -bool UnitTestOptions::FilterMatchesTest(const String &test_case_name, - const String &test_name) { - const String& full_name = String::Format("%s.%s", - test_case_name.c_str(), - test_name.c_str()); - - // Split --gtest_filter at '-', if there is one, to separate into - // positive filter and negative filter portions - const char* const p = GTEST_FLAG(filter).c_str(); - const char* const dash = strchr(p, '-'); - String positive; - String negative; - if (dash == NULL) { - positive = GTEST_FLAG(filter).c_str(); // Whole string is a positive filter - negative = String(""); - } else { - positive = String(p, dash - p); // Everything up to the dash - negative = String(dash+1); // Everything after the dash - if (positive.empty()) { - // Treat '-test1' as the same as '*-test1' - positive = kUniversalFilter; - } - } - - // A filter is a colon-separated list of patterns. It matches a - // test if any pattern in it matches the test. - return (MatchesFilter(full_name, positive.c_str()) && - !MatchesFilter(full_name, negative.c_str())); -} - -#if GTEST_HAS_SEH -// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the -// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. -// This function is useful as an __except condition. -int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) { - // Google Test should handle a SEH exception if: - // 1. the user wants it to, AND - // 2. this is not a breakpoint exception, AND - // 3. this is not a C++ exception (VC++ implements them via SEH, - // apparently). - // - // SEH exception code for C++ exceptions. - // (see http://support.microsoft.com/kb/185294 for more information). - const DWORD kCxxExceptionCode = 0xe06d7363; - - bool should_handle = true; - - if (!GTEST_FLAG(catch_exceptions)) - should_handle = false; - else if (exception_code == EXCEPTION_BREAKPOINT) - should_handle = false; - else if (exception_code == kCxxExceptionCode) - should_handle = false; - - return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH; -} -#endif // GTEST_HAS_SEH - -} // namespace internal - -// The c'tor sets this object as the test part result reporter used by -// Google Test. The 'result' parameter specifies where to report the -// results. Intercepts only failures from the current thread. -ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( - TestPartResultArray* result) - : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD), - result_(result) { - Init(); -} - -// The c'tor sets this object as the test part result reporter used by -// Google Test. The 'result' parameter specifies where to report the -// results. -ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( - InterceptMode intercept_mode, TestPartResultArray* result) - : intercept_mode_(intercept_mode), - result_(result) { - Init(); -} - -void ScopedFakeTestPartResultReporter::Init() { - internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); - if (intercept_mode_ == INTERCEPT_ALL_THREADS) { - old_reporter_ = impl->GetGlobalTestPartResultReporter(); - impl->SetGlobalTestPartResultReporter(this); - } else { - old_reporter_ = impl->GetTestPartResultReporterForCurrentThread(); - impl->SetTestPartResultReporterForCurrentThread(this); - } -} - -// The d'tor restores the test part result reporter used by Google Test -// before. -ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() { - internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); - if (intercept_mode_ == INTERCEPT_ALL_THREADS) { - impl->SetGlobalTestPartResultReporter(old_reporter_); - } else { - impl->SetTestPartResultReporterForCurrentThread(old_reporter_); - } -} - -// Increments the test part result count and remembers the result. -// This method is from the TestPartResultReporterInterface interface. -void ScopedFakeTestPartResultReporter::ReportTestPartResult( - const TestPartResult& result) { - result_->Append(result); -} - -namespace internal { - -// Returns the type ID of ::testing::Test. We should always call this -// instead of GetTypeId< ::testing::Test>() to get the type ID of -// testing::Test. This is to work around a suspected linker bug when -// using Google Test as a framework on Mac OS X. The bug causes -// GetTypeId< ::testing::Test>() to return different values depending -// on whether the call is from the Google Test framework itself or -// from user test code. GetTestTypeId() is guaranteed to always -// return the same value, as it always calls GetTypeId<>() from the -// gtest.cc, which is within the Google Test framework. -TypeId GetTestTypeId() { - return GetTypeId(); -} - -// The value of GetTestTypeId() as seen from within the Google Test -// library. This is solely for testing GetTestTypeId(). -extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId(); - -// This predicate-formatter checks that 'results' contains a test part -// failure of the given type and that the failure message contains the -// given substring. -AssertionResult HasOneFailure(const char* /* results_expr */, - const char* /* type_expr */, - const char* /* substr_expr */, - const TestPartResultArray& results, - TestPartResult::Type type, - const string& substr) { - const String expected(type == TestPartResult::kFatalFailure ? - "1 fatal failure" : - "1 non-fatal failure"); - Message msg; - if (results.size() != 1) { - msg << "Expected: " << expected << "\n" - << " Actual: " << results.size() << " failures"; - for (int i = 0; i < results.size(); i++) { - msg << "\n" << results.GetTestPartResult(i); - } - return AssertionFailure() << msg; - } - - const TestPartResult& r = results.GetTestPartResult(0); - if (r.type() != type) { - return AssertionFailure() << "Expected: " << expected << "\n" - << " Actual:\n" - << r; - } - - if (strstr(r.message(), substr.c_str()) == NULL) { - return AssertionFailure() << "Expected: " << expected << " containing \"" - << substr << "\"\n" - << " Actual:\n" - << r; - } - - return AssertionSuccess(); -} - -// The constructor of SingleFailureChecker remembers where to look up -// test part results, what type of failure we expect, and what -// substring the failure message should contain. -SingleFailureChecker:: SingleFailureChecker( - const TestPartResultArray* results, - TestPartResult::Type type, - const string& substr) - : results_(results), - type_(type), - substr_(substr) {} - -// The destructor of SingleFailureChecker verifies that the given -// TestPartResultArray contains exactly one failure that has the given -// type and contains the given substring. If that's not the case, a -// non-fatal failure will be generated. -SingleFailureChecker::~SingleFailureChecker() { - EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_); -} - -DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter( - UnitTestImpl* unit_test) : unit_test_(unit_test) {} - -void DefaultGlobalTestPartResultReporter::ReportTestPartResult( - const TestPartResult& result) { - unit_test_->current_test_result()->AddTestPartResult(result); - unit_test_->listeners()->repeater()->OnTestPartResult(result); -} - -DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter( - UnitTestImpl* unit_test) : unit_test_(unit_test) {} - -void DefaultPerThreadTestPartResultReporter::ReportTestPartResult( - const TestPartResult& result) { - unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result); -} - -// Returns the global test part result reporter. -TestPartResultReporterInterface* -UnitTestImpl::GetGlobalTestPartResultReporter() { - internal::MutexLock lock(&global_test_part_result_reporter_mutex_); - return global_test_part_result_repoter_; -} - -// Sets the global test part result reporter. -void UnitTestImpl::SetGlobalTestPartResultReporter( - TestPartResultReporterInterface* reporter) { - internal::MutexLock lock(&global_test_part_result_reporter_mutex_); - global_test_part_result_repoter_ = reporter; -} - -// Returns the test part result reporter for the current thread. -TestPartResultReporterInterface* -UnitTestImpl::GetTestPartResultReporterForCurrentThread() { - return per_thread_test_part_result_reporter_.get(); -} - -// Sets the test part result reporter for the current thread. -void UnitTestImpl::SetTestPartResultReporterForCurrentThread( - TestPartResultReporterInterface* reporter) { - per_thread_test_part_result_reporter_.set(reporter); -} - -// Gets the number of successful test cases. -int UnitTestImpl::successful_test_case_count() const { - return CountIf(test_cases_, TestCasePassed); -} - -// Gets the number of failed test cases. -int UnitTestImpl::failed_test_case_count() const { - return CountIf(test_cases_, TestCaseFailed); -} - -// Gets the number of all test cases. -int UnitTestImpl::total_test_case_count() const { - return static_cast(test_cases_.size()); -} - -// Gets the number of all test cases that contain at least one test -// that should run. -int UnitTestImpl::test_case_to_run_count() const { - return CountIf(test_cases_, ShouldRunTestCase); -} - -// Gets the number of successful tests. -int UnitTestImpl::successful_test_count() const { - return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count); -} - -// Gets the number of failed tests. -int UnitTestImpl::failed_test_count() const { - return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count); -} - -// Gets the number of disabled tests. -int UnitTestImpl::disabled_test_count() const { - return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count); -} - -// Gets the number of all tests. -int UnitTestImpl::total_test_count() const { - return SumOverTestCaseList(test_cases_, &TestCase::total_test_count); -} - -// Gets the number of tests that should run. -int UnitTestImpl::test_to_run_count() const { - return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count); -} - -// Returns the current OS stack trace as a String. -// -// The maximum number of stack frames to be included is specified by -// the gtest_stack_trace_depth flag. The skip_count parameter -// specifies the number of top frames to be skipped, which doesn't -// count against the number of frames to be included. -// -// For example, if Foo() calls Bar(), which in turn calls -// CurrentOsStackTraceExceptTop(1), Foo() will be included in the -// trace but Bar() and CurrentOsStackTraceExceptTop() won't. -String UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) { - (void)skip_count; - return String(""); -} - -// Returns the current time in milliseconds. -TimeInMillis GetTimeInMillis() { -#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__) - // Difference between 1970-01-01 and 1601-01-01 in milliseconds. - // http://analogous.blogspot.com/2005/04/epoch.html - const TimeInMillis kJavaEpochToWinFileTimeDelta = - static_cast(116444736UL) * 100000UL; - const DWORD kTenthMicrosInMilliSecond = 10000; - - SYSTEMTIME now_systime; - FILETIME now_filetime; - ULARGE_INTEGER now_int64; - // TODO(kenton@google.com): Shouldn't this just use - // GetSystemTimeAsFileTime()? - GetSystemTime(&now_systime); - if (SystemTimeToFileTime(&now_systime, &now_filetime)) { - now_int64.LowPart = now_filetime.dwLowDateTime; - now_int64.HighPart = now_filetime.dwHighDateTime; - now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) - - kJavaEpochToWinFileTimeDelta; - return now_int64.QuadPart; - } - return 0; -#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_ - __timeb64 now; - -# ifdef _MSC_VER - - // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996 - // (deprecated function) there. - // TODO(kenton@google.com): Use GetTickCount()? Or use - // SystemTimeToFileTime() -# pragma warning(push) // Saves the current warning state. -# pragma warning(disable:4996) // Temporarily disables warning 4996. - _ftime64(&now); -# pragma warning(pop) // Restores the warning state. -# else - - _ftime64(&now); - -# endif // _MSC_VER - - return static_cast(now.time) * 1000 + now.millitm; -#elif GTEST_HAS_GETTIMEOFDAY_ - struct timeval now; - gettimeofday(&now, NULL); - return static_cast(now.tv_sec) * 1000 + now.tv_usec / 1000; -#else -# error "Don't know how to get the current time on your system." -#endif -} - -// Utilities - -// class String - -// Returns the input enclosed in double quotes if it's not NULL; -// otherwise returns "(null)". For example, "\"Hello\"" is returned -// for input "Hello". -// -// This is useful for printing a C string in the syntax of a literal. -// -// Known issue: escape sequences are not handled yet. -String String::ShowCStringQuoted(const char* c_str) { - return c_str ? String::Format("\"%s\"", c_str) : String("(null)"); -} - -// Copies at most length characters from str into a newly-allocated -// piece of memory of size length+1. The memory is allocated with new[]. -// A terminating null byte is written to the memory, and a pointer to it -// is returned. If str is NULL, NULL is returned. -static char* CloneString(const char* str, size_t length) { - if (str == NULL) { - return NULL; - } else { - char* const clone = new char[length + 1]; - posix::StrNCpy(clone, str, length); - clone[length] = '\0'; - return clone; - } -} - -// Clones a 0-terminated C string, allocating memory using new. The -// caller is responsible for deleting[] the return value. Returns the -// cloned string, or NULL if the input is NULL. -const char * String::CloneCString(const char* c_str) { - return (c_str == NULL) ? - NULL : CloneString(c_str, strlen(c_str)); -} - -#if GTEST_OS_WINDOWS_MOBILE -// Creates a UTF-16 wide string from the given ANSI string, allocating -// memory using new. The caller is responsible for deleting the return -// value using delete[]. Returns the wide string, or NULL if the -// input is NULL. -LPCWSTR String::AnsiToUtf16(const char* ansi) { - if (!ansi) return NULL; - const int length = strlen(ansi); - const int unicode_length = - MultiByteToWideChar(CP_ACP, 0, ansi, length, - NULL, 0); - WCHAR* unicode = new WCHAR[unicode_length + 1]; - MultiByteToWideChar(CP_ACP, 0, ansi, length, - unicode, unicode_length); - unicode[unicode_length] = 0; - return unicode; -} - -// Creates an ANSI string from the given wide string, allocating -// memory using new. The caller is responsible for deleting the return -// value using delete[]. Returns the ANSI string, or NULL if the -// input is NULL. -const char* String::Utf16ToAnsi(LPCWSTR utf16_str) { - if (!utf16_str) return NULL; - const int ansi_length = - WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, - NULL, 0, NULL, NULL); - char* ansi = new char[ansi_length + 1]; - WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, - ansi, ansi_length, NULL, NULL); - ansi[ansi_length] = 0; - return ansi; -} - -#endif // GTEST_OS_WINDOWS_MOBILE - -// Compares two C strings. Returns true iff they have the same content. -// -// Unlike strcmp(), this function can handle NULL argument(s). A NULL -// C string is considered different to any non-NULL C string, -// including the empty string. -bool String::CStringEquals(const char * lhs, const char * rhs) { - if ( lhs == NULL ) return rhs == NULL; - - if ( rhs == NULL ) return false; - - return strcmp(lhs, rhs) == 0; -} - -#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING - -// Converts an array of wide chars to a narrow string using the UTF-8 -// encoding, and streams the result to the given Message object. -static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length, - Message* msg) { - // TODO(wan): consider allowing a testing::String object to - // contain '\0'. This will make it behave more like std::string, - // and will allow ToUtf8String() to return the correct encoding - // for '\0' s.t. we can get rid of the conditional here (and in - // several other places). - for (size_t i = 0; i != length; ) { // NOLINT - if (wstr[i] != L'\0') { - *msg << WideStringToUtf8(wstr + i, static_cast(length - i)); - while (i != length && wstr[i] != L'\0') - i++; - } else { - *msg << '\0'; - i++; - } - } -} - -#endif // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING - -} // namespace internal - -#if GTEST_HAS_STD_WSTRING -// Converts the given wide string to a narrow string using the UTF-8 -// encoding, and streams the result to this Message object. -Message& Message::operator <<(const ::std::wstring& wstr) { - internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); - return *this; -} -#endif // GTEST_HAS_STD_WSTRING - -#if GTEST_HAS_GLOBAL_WSTRING -// Converts the given wide string to a narrow string using the UTF-8 -// encoding, and streams the result to this Message object. -Message& Message::operator <<(const ::wstring& wstr) { - internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); - return *this; -} -#endif // GTEST_HAS_GLOBAL_WSTRING - -// AssertionResult constructors. -// Used in EXPECT_TRUE/FALSE(assertion_result). -AssertionResult::AssertionResult(const AssertionResult& other) - : success_(other.success_), - message_(other.message_.get() != NULL ? - new ::std::string(*other.message_) : - static_cast< ::std::string*>(NULL)) { -} - -// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE. -AssertionResult AssertionResult::operator!() const { - AssertionResult negation(!success_); - if (message_.get() != NULL) - negation << *message_; - return negation; -} - -// Makes a successful assertion result. -AssertionResult AssertionSuccess() { - return AssertionResult(true); -} - -// Makes a failed assertion result. -AssertionResult AssertionFailure() { - return AssertionResult(false); -} - -// Makes a failed assertion result with the given failure message. -// Deprecated; use AssertionFailure() << message. -AssertionResult AssertionFailure(const Message& message) { - return AssertionFailure() << message; -} - -namespace internal { - -// Constructs and returns the message for an equality assertion -// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. -// -// The first four parameters are the expressions used in the assertion -// and their values, as strings. For example, for ASSERT_EQ(foo, bar) -// where foo is 5 and bar is 6, we have: -// -// expected_expression: "foo" -// actual_expression: "bar" -// expected_value: "5" -// actual_value: "6" -// -// The ignoring_case parameter is true iff the assertion is a -// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will -// be inserted into the message. -AssertionResult EqFailure(const char* expected_expression, - const char* actual_expression, - const String& expected_value, - const String& actual_value, - bool ignoring_case) { - Message msg; - msg << "Value of: " << actual_expression; - if (actual_value != actual_expression) { - msg << "\n Actual: " << actual_value; - } - - msg << "\nExpected: " << expected_expression; - if (ignoring_case) { - msg << " (ignoring case)"; - } - if (expected_value != expected_expression) { - msg << "\nWhich is: " << expected_value; - } - - return AssertionFailure() << msg; -} - -// Constructs a failure message for Boolean assertions such as EXPECT_TRUE. -String GetBoolAssertionFailureMessage(const AssertionResult& assertion_result, - const char* expression_text, - const char* actual_predicate_value, - const char* expected_predicate_value) { - const char* actual_message = assertion_result.message(); - Message msg; - msg << "Value of: " << expression_text - << "\n Actual: " << actual_predicate_value; - if (actual_message[0] != '\0') - msg << " (" << actual_message << ")"; - msg << "\nExpected: " << expected_predicate_value; - return msg.GetString(); -} - -// Helper function for implementing ASSERT_NEAR. -AssertionResult DoubleNearPredFormat(const char* expr1, - const char* expr2, - const char* abs_error_expr, - double val1, - double val2, - double abs_error) { - const double diff = fabs(val1 - val2); - if (diff <= abs_error) return AssertionSuccess(); - - // TODO(wan): do not print the value of an expression if it's - // already a literal. - return AssertionFailure() - << "The difference between " << expr1 << " and " << expr2 - << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n" - << expr1 << " evaluates to " << val1 << ",\n" - << expr2 << " evaluates to " << val2 << ", and\n" - << abs_error_expr << " evaluates to " << abs_error << "."; -} - - -// Helper template for implementing FloatLE() and DoubleLE(). -template -AssertionResult FloatingPointLE(const char* expr1, - const char* expr2, - RawType val1, - RawType val2) { - // Returns success if val1 is less than val2, - if (val1 < val2) { - return AssertionSuccess(); - } - - // or if val1 is almost equal to val2. - const FloatingPoint lhs(val1), rhs(val2); - if (lhs.AlmostEquals(rhs)) { - return AssertionSuccess(); - } - - // Note that the above two checks will both fail if either val1 or - // val2 is NaN, as the IEEE floating-point standard requires that - // any predicate involving a NaN must return false. - - ::std::stringstream val1_ss; - val1_ss << std::setprecision(std::numeric_limits::digits10 + 2) - << val1; - - ::std::stringstream val2_ss; - val2_ss << std::setprecision(std::numeric_limits::digits10 + 2) - << val2; - - return AssertionFailure() - << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n" - << " Actual: " << StringStreamToString(&val1_ss) << " vs " - << StringStreamToString(&val2_ss); -} - -} // namespace internal - -// Asserts that val1 is less than, or almost equal to, val2. Fails -// otherwise. In particular, it fails if either val1 or val2 is NaN. -AssertionResult FloatLE(const char* expr1, const char* expr2, - float val1, float val2) { - return internal::FloatingPointLE(expr1, expr2, val1, val2); -} - -// Asserts that val1 is less than, or almost equal to, val2. Fails -// otherwise. In particular, it fails if either val1 or val2 is NaN. -AssertionResult DoubleLE(const char* expr1, const char* expr2, - double val1, double val2) { - return internal::FloatingPointLE(expr1, expr2, val1, val2); -} - -namespace internal { - -// The helper function for {ASSERT|EXPECT}_EQ with int or enum -// arguments. -AssertionResult CmpHelperEQ(const char* expected_expression, - const char* actual_expression, - BiggestInt expected, - BiggestInt actual) { - if (expected == actual) { - return AssertionSuccess(); - } - - return EqFailure(expected_expression, - actual_expression, - FormatForComparisonFailureMessage(expected, actual), - FormatForComparisonFailureMessage(actual, expected), - false); -} - -// A macro for implementing the helper functions needed to implement -// ASSERT_?? and EXPECT_?? with integer or enum arguments. It is here -// just to avoid copy-and-paste of similar code. -#define GTEST_IMPL_CMP_HELPER_(op_name, op)\ -AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ - BiggestInt val1, BiggestInt val2) {\ - if (val1 op val2) {\ - return AssertionSuccess();\ - } else {\ - return AssertionFailure() \ - << "Expected: (" << expr1 << ") " #op " (" << expr2\ - << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ - << " vs " << FormatForComparisonFailureMessage(val2, val1);\ - }\ -} - -// Implements the helper function for {ASSERT|EXPECT}_NE with int or -// enum arguments. -GTEST_IMPL_CMP_HELPER_(NE, !=) -// Implements the helper function for {ASSERT|EXPECT}_LE with int or -// enum arguments. -GTEST_IMPL_CMP_HELPER_(LE, <=) -// Implements the helper function for {ASSERT|EXPECT}_LT with int or -// enum arguments. -GTEST_IMPL_CMP_HELPER_(LT, < ) -// Implements the helper function for {ASSERT|EXPECT}_GE with int or -// enum arguments. -GTEST_IMPL_CMP_HELPER_(GE, >=) -// Implements the helper function for {ASSERT|EXPECT}_GT with int or -// enum arguments. -GTEST_IMPL_CMP_HELPER_(GT, > ) - -#undef GTEST_IMPL_CMP_HELPER_ - -// The helper function for {ASSERT|EXPECT}_STREQ. -AssertionResult CmpHelperSTREQ(const char* expected_expression, - const char* actual_expression, - const char* expected, - const char* actual) { - if (String::CStringEquals(expected, actual)) { - return AssertionSuccess(); - } - - return EqFailure(expected_expression, - actual_expression, - String::ShowCStringQuoted(expected), - String::ShowCStringQuoted(actual), - false); -} - -// The helper function for {ASSERT|EXPECT}_STRCASEEQ. -AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression, - const char* actual_expression, - const char* expected, - const char* actual) { - if (String::CaseInsensitiveCStringEquals(expected, actual)) { - return AssertionSuccess(); - } - - return EqFailure(expected_expression, - actual_expression, - String::ShowCStringQuoted(expected), - String::ShowCStringQuoted(actual), - true); -} - -// The helper function for {ASSERT|EXPECT}_STRNE. -AssertionResult CmpHelperSTRNE(const char* s1_expression, - const char* s2_expression, - const char* s1, - const char* s2) { - if (!String::CStringEquals(s1, s2)) { - return AssertionSuccess(); - } else { - return AssertionFailure() << "Expected: (" << s1_expression << ") != (" - << s2_expression << "), actual: \"" - << s1 << "\" vs \"" << s2 << "\""; - } -} - -// The helper function for {ASSERT|EXPECT}_STRCASENE. -AssertionResult CmpHelperSTRCASENE(const char* s1_expression, - const char* s2_expression, - const char* s1, - const char* s2) { - if (!String::CaseInsensitiveCStringEquals(s1, s2)) { - return AssertionSuccess(); - } else { - return AssertionFailure() - << "Expected: (" << s1_expression << ") != (" - << s2_expression << ") (ignoring case), actual: \"" - << s1 << "\" vs \"" << s2 << "\""; - } -} - -} // namespace internal - -namespace { - -// Helper functions for implementing IsSubString() and IsNotSubstring(). - -// This group of overloaded functions return true iff needle is a -// substring of haystack. NULL is considered a substring of itself -// only. - -bool IsSubstringPred(const char* needle, const char* haystack) { - if (needle == NULL || haystack == NULL) - return needle == haystack; - - return strstr(haystack, needle) != NULL; -} - -bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) { - if (needle == NULL || haystack == NULL) - return needle == haystack; - - return wcsstr(haystack, needle) != NULL; -} - -// StringType here can be either ::std::string or ::std::wstring. -template -bool IsSubstringPred(const StringType& needle, - const StringType& haystack) { - return haystack.find(needle) != StringType::npos; -} - -// This function implements either IsSubstring() or IsNotSubstring(), -// depending on the value of the expected_to_be_substring parameter. -// StringType here can be const char*, const wchar_t*, ::std::string, -// or ::std::wstring. -template -AssertionResult IsSubstringImpl( - bool expected_to_be_substring, - const char* needle_expr, const char* haystack_expr, - const StringType& needle, const StringType& haystack) { - if (IsSubstringPred(needle, haystack) == expected_to_be_substring) - return AssertionSuccess(); - - const bool is_wide_string = sizeof(needle[0]) > 1; - const char* const begin_string_quote = is_wide_string ? "L\"" : "\""; - return AssertionFailure() - << "Value of: " << needle_expr << "\n" - << " Actual: " << begin_string_quote << needle << "\"\n" - << "Expected: " << (expected_to_be_substring ? "" : "not ") - << "a substring of " << haystack_expr << "\n" - << "Which is: " << begin_string_quote << haystack << "\""; -} - -} // namespace - -// IsSubstring() and IsNotSubstring() check whether needle is a -// substring of haystack (NULL is considered a substring of itself -// only), and return an appropriate error message when they fail. - -AssertionResult IsSubstring( - const char* needle_expr, const char* haystack_expr, - const char* needle, const char* haystack) { - return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); -} - -AssertionResult IsSubstring( - const char* needle_expr, const char* haystack_expr, - const wchar_t* needle, const wchar_t* haystack) { - return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); -} - -AssertionResult IsNotSubstring( - const char* needle_expr, const char* haystack_expr, - const char* needle, const char* haystack) { - return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); -} - -AssertionResult IsNotSubstring( - const char* needle_expr, const char* haystack_expr, - const wchar_t* needle, const wchar_t* haystack) { - return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); -} - -AssertionResult IsSubstring( - const char* needle_expr, const char* haystack_expr, - const ::std::string& needle, const ::std::string& haystack) { - return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); -} - -AssertionResult IsNotSubstring( - const char* needle_expr, const char* haystack_expr, - const ::std::string& needle, const ::std::string& haystack) { - return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); -} - -#if GTEST_HAS_STD_WSTRING -AssertionResult IsSubstring( - const char* needle_expr, const char* haystack_expr, - const ::std::wstring& needle, const ::std::wstring& haystack) { - return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); -} - -AssertionResult IsNotSubstring( - const char* needle_expr, const char* haystack_expr, - const ::std::wstring& needle, const ::std::wstring& haystack) { - return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); -} -#endif // GTEST_HAS_STD_WSTRING - -namespace internal { - -#if GTEST_OS_WINDOWS - -namespace { - -// Helper function for IsHRESULT{SuccessFailure} predicates -AssertionResult HRESULTFailureHelper(const char* expr, - const char* expected, - long hr) { // NOLINT -# if GTEST_OS_WINDOWS_MOBILE - - // Windows CE doesn't support FormatMessage. - const char error_text[] = ""; - -# else - - // Looks up the human-readable system message for the HRESULT code - // and since we're not passing any params to FormatMessage, we don't - // want inserts expanded. - const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS; - const DWORD kBufSize = 4096; // String::Format can't exceed this length. - // Gets the system's human readable message string for this HRESULT. - char error_text[kBufSize] = { '\0' }; - DWORD message_length = ::FormatMessageA(kFlags, - 0, // no source, we're asking system - hr, // the error - 0, // no line width restrictions - error_text, // output buffer - kBufSize, // buf size - NULL); // no arguments for inserts - // Trims tailing white space (FormatMessage leaves a trailing cr-lf) - for (; message_length && IsSpace(error_text[message_length - 1]); - --message_length) { - error_text[message_length - 1] = '\0'; - } - -# endif // GTEST_OS_WINDOWS_MOBILE - - const String error_hex(String::Format("0x%08X ", hr)); - return ::testing::AssertionFailure() - << "Expected: " << expr << " " << expected << ".\n" - << " Actual: " << error_hex << error_text << "\n"; -} - -} // namespace - -AssertionResult IsHRESULTSuccess(const char* expr, long hr) { // NOLINT - if (SUCCEEDED(hr)) { - return AssertionSuccess(); - } - return HRESULTFailureHelper(expr, "succeeds", hr); -} - -AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT - if (FAILED(hr)) { - return AssertionSuccess(); - } - return HRESULTFailureHelper(expr, "fails", hr); -} - -#endif // GTEST_OS_WINDOWS - -// Utility functions for encoding Unicode text (wide strings) in -// UTF-8. - -// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8 -// like this: -// -// Code-point length Encoding -// 0 - 7 bits 0xxxxxxx -// 8 - 11 bits 110xxxxx 10xxxxxx -// 12 - 16 bits 1110xxxx 10xxxxxx 10xxxxxx -// 17 - 21 bits 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - -// The maximum code-point a one-byte UTF-8 sequence can represent. -const UInt32 kMaxCodePoint1 = (static_cast(1) << 7) - 1; - -// The maximum code-point a two-byte UTF-8 sequence can represent. -const UInt32 kMaxCodePoint2 = (static_cast(1) << (5 + 6)) - 1; - -// The maximum code-point a three-byte UTF-8 sequence can represent. -const UInt32 kMaxCodePoint3 = (static_cast(1) << (4 + 2*6)) - 1; - -// The maximum code-point a four-byte UTF-8 sequence can represent. -const UInt32 kMaxCodePoint4 = (static_cast(1) << (3 + 3*6)) - 1; - -// Chops off the n lowest bits from a bit pattern. Returns the n -// lowest bits. As a side effect, the original bit pattern will be -// shifted to the right by n bits. -inline UInt32 ChopLowBits(UInt32* bits, int n) { - const UInt32 low_bits = *bits & ((static_cast(1) << n) - 1); - *bits >>= n; - return low_bits; -} - -// Converts a Unicode code point to a narrow string in UTF-8 encoding. -// code_point parameter is of type UInt32 because wchar_t may not be -// wide enough to contain a code point. -// The output buffer str must containt at least 32 characters. -// The function returns the address of the output buffer. -// If the code_point is not a valid Unicode code point -// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output -// as '(Invalid Unicode 0xXXXXXXXX)'. -char* CodePointToUtf8(UInt32 code_point, char* str) { - if (code_point <= kMaxCodePoint1) { - str[1] = '\0'; - str[0] = static_cast(code_point); // 0xxxxxxx - } else if (code_point <= kMaxCodePoint2) { - str[2] = '\0'; - str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx - str[0] = static_cast(0xC0 | code_point); // 110xxxxx - } else if (code_point <= kMaxCodePoint3) { - str[3] = '\0'; - str[2] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx - str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx - str[0] = static_cast(0xE0 | code_point); // 1110xxxx - } else if (code_point <= kMaxCodePoint4) { - str[4] = '\0'; - str[3] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx - str[2] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx - str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx - str[0] = static_cast(0xF0 | code_point); // 11110xxx - } else { - // The longest string String::Format can produce when invoked - // with these parameters is 28 character long (not including - // the terminating nul character). We are asking for 32 character - // buffer just in case. This is also enough for strncpy to - // null-terminate the destination string. - posix::StrNCpy( - str, String::Format("(Invalid Unicode 0x%X)", code_point).c_str(), 32); - str[31] = '\0'; // Makes sure no change in the format to strncpy leaves - // the result unterminated. - } - return str; -} - -// The following two functions only make sense if the the system -// uses UTF-16 for wide string encoding. All supported systems -// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16. - -// Determines if the arguments constitute UTF-16 surrogate pair -// and thus should be combined into a single Unicode code point -// using CreateCodePointFromUtf16SurrogatePair. -inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) { - return sizeof(wchar_t) == 2 && - (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00; -} - -// Creates a Unicode code point from UTF16 surrogate pair. -inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first, - wchar_t second) { - const UInt32 mask = (1 << 10) - 1; - return (sizeof(wchar_t) == 2) ? - (((first & mask) << 10) | (second & mask)) + 0x10000 : - // This function should not be called when the condition is - // false, but we provide a sensible default in case it is. - static_cast(first); -} - -// Converts a wide string to a narrow string in UTF-8 encoding. -// The wide string is assumed to have the following encoding: -// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS) -// UTF-32 if sizeof(wchar_t) == 4 (on Linux) -// Parameter str points to a null-terminated wide string. -// Parameter num_chars may additionally limit the number -// of wchar_t characters processed. -1 is used when the entire string -// should be processed. -// If the string contains code points that are not valid Unicode code points -// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output -// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding -// and contains invalid UTF-16 surrogate pairs, values in those pairs -// will be encoded as individual Unicode characters from Basic Normal Plane. -String WideStringToUtf8(const wchar_t* str, int num_chars) { - if (num_chars == -1) - num_chars = static_cast(wcslen(str)); - - ::std::stringstream stream; - for (int i = 0; i < num_chars; ++i) { - UInt32 unicode_code_point; - - if (str[i] == L'\0') { - break; - } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) { - unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i], - str[i + 1]); - i++; - } else { - unicode_code_point = static_cast(str[i]); - } - - char buffer[32]; // CodePointToUtf8 requires a buffer this big. - stream << CodePointToUtf8(unicode_code_point, buffer); - } - return StringStreamToString(&stream); -} - -// Converts a wide C string to a String using the UTF-8 encoding. -// NULL will be converted to "(null)". -String String::ShowWideCString(const wchar_t * wide_c_str) { - if (wide_c_str == NULL) return String("(null)"); - - return String(internal::WideStringToUtf8(wide_c_str, -1).c_str()); -} - -// Similar to ShowWideCString(), except that this function encloses -// the converted string in double quotes. -String String::ShowWideCStringQuoted(const wchar_t* wide_c_str) { - if (wide_c_str == NULL) return String("(null)"); - - return String::Format("L\"%s\"", - String::ShowWideCString(wide_c_str).c_str()); -} - -// Compares two wide C strings. Returns true iff they have the same -// content. -// -// Unlike wcscmp(), this function can handle NULL argument(s). A NULL -// C string is considered different to any non-NULL C string, -// including the empty string. -bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) { - if (lhs == NULL) return rhs == NULL; - - if (rhs == NULL) return false; - - return wcscmp(lhs, rhs) == 0; -} - -// Helper function for *_STREQ on wide strings. -AssertionResult CmpHelperSTREQ(const char* expected_expression, - const char* actual_expression, - const wchar_t* expected, - const wchar_t* actual) { - if (String::WideCStringEquals(expected, actual)) { - return AssertionSuccess(); - } - - return EqFailure(expected_expression, - actual_expression, - String::ShowWideCStringQuoted(expected), - String::ShowWideCStringQuoted(actual), - false); -} - -// Helper function for *_STRNE on wide strings. -AssertionResult CmpHelperSTRNE(const char* s1_expression, - const char* s2_expression, - const wchar_t* s1, - const wchar_t* s2) { - if (!String::WideCStringEquals(s1, s2)) { - return AssertionSuccess(); - } - - return AssertionFailure() << "Expected: (" << s1_expression << ") != (" - << s2_expression << "), actual: " - << String::ShowWideCStringQuoted(s1) - << " vs " << String::ShowWideCStringQuoted(s2); -} - -// Compares two C strings, ignoring case. Returns true iff they have -// the same content. -// -// Unlike strcasecmp(), this function can handle NULL argument(s). A -// NULL C string is considered different to any non-NULL C string, -// including the empty string. -bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) { - if (lhs == NULL) - return rhs == NULL; - if (rhs == NULL) - return false; - return posix::StrCaseCmp(lhs, rhs) == 0; -} - - // Compares two wide C strings, ignoring case. Returns true iff they - // have the same content. - // - // Unlike wcscasecmp(), this function can handle NULL argument(s). - // A NULL C string is considered different to any non-NULL wide C string, - // including the empty string. - // NB: The implementations on different platforms slightly differ. - // On windows, this method uses _wcsicmp which compares according to LC_CTYPE - // environment variable. On GNU platform this method uses wcscasecmp - // which compares according to LC_CTYPE category of the current locale. - // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the - // current locale. -bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs, - const wchar_t* rhs) { - if (lhs == NULL) return rhs == NULL; - - if (rhs == NULL) return false; - -#if GTEST_OS_WINDOWS - return _wcsicmp(lhs, rhs) == 0; -#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID - return wcscasecmp(lhs, rhs) == 0; -#else - // Android, Mac OS X and Cygwin don't define wcscasecmp. - // Other unknown OSes may not define it either. - wint_t left, right; - do { - left = towlower(*lhs++); - right = towlower(*rhs++); - } while (left && left == right); - return left == right; -#endif // OS selector -} - -// Compares this with another String. -// Returns < 0 if this is less than rhs, 0 if this is equal to rhs, or > 0 -// if this is greater than rhs. -int String::Compare(const String & rhs) const { - const char* const lhs_c_str = c_str(); - const char* const rhs_c_str = rhs.c_str(); - - if (lhs_c_str == NULL) { - return rhs_c_str == NULL ? 0 : -1; // NULL < anything except NULL - } else if (rhs_c_str == NULL) { - return 1; - } - - const size_t shorter_str_len = - length() <= rhs.length() ? length() : rhs.length(); - for (size_t i = 0; i != shorter_str_len; i++) { - if (lhs_c_str[i] < rhs_c_str[i]) { - return -1; - } else if (lhs_c_str[i] > rhs_c_str[i]) { - return 1; - } - } - return (length() < rhs.length()) ? -1 : - (length() > rhs.length()) ? 1 : 0; -} - -// Returns true iff this String ends with the given suffix. *Any* -// String is considered to end with a NULL or empty suffix. -bool String::EndsWith(const char* suffix) const { - if (suffix == NULL || CStringEquals(suffix, "")) return true; - - if (c_str() == NULL) return false; - - const size_t this_len = strlen(c_str()); - const size_t suffix_len = strlen(suffix); - return (this_len >= suffix_len) && - CStringEquals(c_str() + this_len - suffix_len, suffix); -} - -// Returns true iff this String ends with the given suffix, ignoring case. -// Any String is considered to end with a NULL or empty suffix. -bool String::EndsWithCaseInsensitive(const char* suffix) const { - if (suffix == NULL || CStringEquals(suffix, "")) return true; - - if (c_str() == NULL) return false; - - const size_t this_len = strlen(c_str()); - const size_t suffix_len = strlen(suffix); - return (this_len >= suffix_len) && - CaseInsensitiveCStringEquals(c_str() + this_len - suffix_len, suffix); -} - -// Formats a list of arguments to a String, using the same format -// spec string as for printf. -// -// We do not use the StringPrintf class as it is not universally -// available. -// -// The result is limited to 4096 characters (including the tailing 0). -// If 4096 characters are not enough to format the input, or if -// there's an error, "" is -// returned. -String String::Format(const char * format, ...) { - va_list args; - va_start(args, format); - - char buffer[4096]; - const int kBufferSize = sizeof(buffer)/sizeof(buffer[0]); - - // MSVC 8 deprecates vsnprintf(), so we want to suppress warning - // 4996 (deprecated function) there. -#ifdef _MSC_VER // We are using MSVC. -# pragma warning(push) // Saves the current warning state. -# pragma warning(disable:4996) // Temporarily disables warning 4996. - - const int size = vsnprintf(buffer, kBufferSize, format, args); - -# pragma warning(pop) // Restores the warning state. -#else // We are not using MSVC. - const int size = vsnprintf(buffer, kBufferSize, format, args); -#endif // _MSC_VER - va_end(args); - - // vsnprintf()'s behavior is not portable. When the buffer is not - // big enough, it returns a negative value in MSVC, and returns the - // needed buffer size on Linux. When there is an output error, it - // always returns a negative value. For simplicity, we lump the two - // error cases together. - if (size < 0 || size >= kBufferSize) { - return String(""); - } else { - return String(buffer, size); - } -} - -// Converts the buffer in a stringstream to a String, converting NUL -// bytes to "\\0" along the way. -String StringStreamToString(::std::stringstream* ss) { - const ::std::string& str = ss->str(); - const char* const start = str.c_str(); - const char* const end = start + str.length(); - - // We need to use a helper stringstream to do this transformation - // because String doesn't support push_back(). - ::std::stringstream helper; - for (const char* ch = start; ch != end; ++ch) { - if (*ch == '\0') { - helper << "\\0"; // Replaces NUL with "\\0"; - } else { - helper.put(*ch); - } - } - - return String(helper.str().c_str()); -} - -// Appends the user-supplied message to the Google-Test-generated message. -String AppendUserMessage(const String& gtest_msg, - const Message& user_msg) { - // Appends the user message if it's non-empty. - const String user_msg_string = user_msg.GetString(); - if (user_msg_string.empty()) { - return gtest_msg; - } - - Message msg; - msg << gtest_msg << "\n" << user_msg_string; - - return msg.GetString(); -} - -} // namespace internal - -// class TestResult - -// Creates an empty TestResult. -TestResult::TestResult() - : death_test_count_(0), - elapsed_time_(0) { -} - -// D'tor. -TestResult::~TestResult() { -} - -// Returns the i-th test part result among all the results. i can -// range from 0 to total_part_count() - 1. If i is not in that range, -// aborts the program. -const TestPartResult& TestResult::GetTestPartResult(int i) const { - if (i < 0 || i >= total_part_count()) - internal::posix::Abort(); - return test_part_results_.at(i); -} - -// Returns the i-th test property. i can range from 0 to -// test_property_count() - 1. If i is not in that range, aborts the -// program. -const TestProperty& TestResult::GetTestProperty(int i) const { - if (i < 0 || i >= test_property_count()) - internal::posix::Abort(); - return test_properties_.at(i); -} - -// Clears the test part results. -void TestResult::ClearTestPartResults() { - test_part_results_.clear(); -} - -// Adds a test part result to the list. -void TestResult::AddTestPartResult(const TestPartResult& test_part_result) { - test_part_results_.push_back(test_part_result); -} - -// Adds a test property to the list. If a property with the same key as the -// supplied property is already represented, the value of this test_property -// replaces the old value for that key. -void TestResult::RecordProperty(const TestProperty& test_property) { - if (!ValidateTestProperty(test_property)) { - return; - } - internal::MutexLock lock(&test_properites_mutex_); - const std::vector::iterator property_with_matching_key = - std::find_if(test_properties_.begin(), test_properties_.end(), - internal::TestPropertyKeyIs(test_property.key())); - if (property_with_matching_key == test_properties_.end()) { - test_properties_.push_back(test_property); - return; - } - property_with_matching_key->SetValue(test_property.value()); -} - -// Adds a failure if the key is a reserved attribute of Google Test -// testcase tags. Returns true if the property is valid. -bool TestResult::ValidateTestProperty(const TestProperty& test_property) { - internal::String key(test_property.key()); - if (key == "name" || key == "status" || key == "time" || key == "classname") { - ADD_FAILURE() - << "Reserved key used in RecordProperty(): " - << key - << " ('name', 'status', 'time', and 'classname' are reserved by " - << GTEST_NAME_ << ")"; - return false; - } - return true; -} - -// Clears the object. -void TestResult::Clear() { - test_part_results_.clear(); - test_properties_.clear(); - death_test_count_ = 0; - elapsed_time_ = 0; -} - -// Returns true iff the test failed. -bool TestResult::Failed() const { - for (int i = 0; i < total_part_count(); ++i) { - if (GetTestPartResult(i).failed()) - return true; - } - return false; -} - -// Returns true iff the test part fatally failed. -static bool TestPartFatallyFailed(const TestPartResult& result) { - return result.fatally_failed(); -} - -// Returns true iff the test fatally failed. -bool TestResult::HasFatalFailure() const { - return CountIf(test_part_results_, TestPartFatallyFailed) > 0; -} - -// Returns true iff the test part non-fatally failed. -static bool TestPartNonfatallyFailed(const TestPartResult& result) { - return result.nonfatally_failed(); -} - -// Returns true iff the test has a non-fatal failure. -bool TestResult::HasNonfatalFailure() const { - return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0; -} - -// Gets the number of all test parts. This is the sum of the number -// of successful test parts and the number of failed test parts. -int TestResult::total_part_count() const { - return static_cast(test_part_results_.size()); -} - -// Returns the number of the test properties. -int TestResult::test_property_count() const { - return static_cast(test_properties_.size()); -} - -// class Test - -// Creates a Test object. - -// The c'tor saves the values of all Google Test flags. -Test::Test() - : gtest_flag_saver_(new internal::GTestFlagSaver) { -} - -// The d'tor restores the values of all Google Test flags. -Test::~Test() { - delete gtest_flag_saver_; -} - -// Sets up the test fixture. -// -// A sub-class may override this. -void Test::SetUp() { -} - -// Tears down the test fixture. -// -// A sub-class may override this. -void Test::TearDown() { -} - -// Allows user supplied key value pairs to be recorded for later output. -void Test::RecordProperty(const char* key, const char* value) { - UnitTest::GetInstance()->RecordPropertyForCurrentTest(key, value); -} - -// Allows user supplied key value pairs to be recorded for later output. -void Test::RecordProperty(const char* key, int value) { - Message value_message; - value_message << value; - RecordProperty(key, value_message.GetString().c_str()); -} - -namespace internal { - -void ReportFailureInUnknownLocation(TestPartResult::Type result_type, - const String& message) { - // This function is a friend of UnitTest and as such has access to - // AddTestPartResult. - UnitTest::GetInstance()->AddTestPartResult( - result_type, - NULL, // No info about the source file where the exception occurred. - -1, // We have no info on which line caused the exception. - message, - String()); // No stack trace, either. -} - -} // namespace internal - -// Google Test requires all tests in the same test case to use the same test -// fixture class. This function checks if the current test has the -// same fixture class as the first test in the current test case. If -// yes, it returns true; otherwise it generates a Google Test failure and -// returns false. -bool Test::HasSameFixtureClass() { - internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); - const TestCase* const test_case = impl->current_test_case(); - - // Info about the first test in the current test case. - const TestInfo* const first_test_info = test_case->test_info_list()[0]; - const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_; - const char* const first_test_name = first_test_info->name(); - - // Info about the current test. - const TestInfo* const this_test_info = impl->current_test_info(); - const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_; - const char* const this_test_name = this_test_info->name(); - - if (this_fixture_id != first_fixture_id) { - // Is the first test defined using TEST? - const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId(); - // Is this test defined using TEST? - const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId(); - - if (first_is_TEST || this_is_TEST) { - // The user mixed TEST and TEST_F in this test case - we'll tell - // him/her how to fix it. - - // Gets the name of the TEST and the name of the TEST_F. Note - // that first_is_TEST and this_is_TEST cannot both be true, as - // the fixture IDs are different for the two tests. - const char* const TEST_name = - first_is_TEST ? first_test_name : this_test_name; - const char* const TEST_F_name = - first_is_TEST ? this_test_name : first_test_name; - - ADD_FAILURE() - << "All tests in the same test case must use the same test fixture\n" - << "class, so mixing TEST_F and TEST in the same test case is\n" - << "illegal. In test case " << this_test_info->test_case_name() - << ",\n" - << "test " << TEST_F_name << " is defined using TEST_F but\n" - << "test " << TEST_name << " is defined using TEST. You probably\n" - << "want to change the TEST to TEST_F or move it to another test\n" - << "case."; - } else { - // The user defined two fixture classes with the same name in - // two namespaces - we'll tell him/her how to fix it. - ADD_FAILURE() - << "All tests in the same test case must use the same test fixture\n" - << "class. However, in test case " - << this_test_info->test_case_name() << ",\n" - << "you defined test " << first_test_name - << " and test " << this_test_name << "\n" - << "using two different test fixture classes. This can happen if\n" - << "the two classes are from different namespaces or translation\n" - << "units and have the same name. You should probably rename one\n" - << "of the classes to put the tests into different test cases."; - } - return false; - } - - return true; -} - -#if GTEST_HAS_SEH - -// Adds an "exception thrown" fatal failure to the current test. This -// function returns its result via an output parameter pointer because VC++ -// prohibits creation of objects with destructors on stack in functions -// using __try (see error C2712). -static internal::String* FormatSehExceptionMessage(DWORD exception_code, - const char* location) { - Message message; - message << "SEH exception with code 0x" << std::setbase(16) << - exception_code << std::setbase(10) << " thrown in " << location << "."; - - return new internal::String(message.GetString()); -} - -#endif // GTEST_HAS_SEH - -#if GTEST_HAS_EXCEPTIONS - -// Adds an "exception thrown" fatal failure to the current test. -static internal::String FormatCxxExceptionMessage(const char* description, - const char* location) { - Message message; - if (description != NULL) { - message << "C++ exception with description \"" << description << "\""; - } else { - message << "Unknown C++ exception"; - } - message << " thrown in " << location << "."; - - return message.GetString(); -} - -static internal::String PrintTestPartResultToString( - const TestPartResult& test_part_result); - -// A failed Google Test assertion will throw an exception of this type when -// GTEST_FLAG(throw_on_failure) is true (if exceptions are enabled). We -// derive it from std::runtime_error, which is for errors presumably -// detectable only at run time. Since std::runtime_error inherits from -// std::exception, many testing frameworks know how to extract and print the -// message inside it. -class GoogleTestFailureException : public ::std::runtime_error { - public: - explicit GoogleTestFailureException(const TestPartResult& failure) - : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {} -}; -#endif // GTEST_HAS_EXCEPTIONS - -namespace internal { -// We put these helper functions in the internal namespace as IBM's xlC -// compiler rejects the code if they were declared static. - -// Runs the given method and handles SEH exceptions it throws, when -// SEH is supported; returns the 0-value for type Result in case of an -// SEH exception. (Microsoft compilers cannot handle SEH and C++ -// exceptions in the same function. Therefore, we provide a separate -// wrapper function for handling SEH exceptions.) -template -Result HandleSehExceptionsInMethodIfSupported( - T* object, Result (T::*method)(), const char* location) { -#if GTEST_HAS_SEH - __try { - return (object->*method)(); - } __except (internal::UnitTestOptions::GTestShouldProcessSEH( // NOLINT - GetExceptionCode())) { - // We create the exception message on the heap because VC++ prohibits - // creation of objects with destructors on stack in functions using __try - // (see error C2712). - internal::String* exception_message = FormatSehExceptionMessage( - GetExceptionCode(), location); - internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure, - *exception_message); - delete exception_message; - return static_cast(0); - } -#else - (void)location; - return (object->*method)(); -#endif // GTEST_HAS_SEH -} - -// Runs the given method and catches and reports C++ and/or SEH-style -// exceptions, if they are supported; returns the 0-value for type -// Result in case of an SEH exception. -template -Result HandleExceptionsInMethodIfSupported( - T* object, Result (T::*method)(), const char* location) { - // NOTE: The user code can affect the way in which Google Test handles - // exceptions by setting GTEST_FLAG(catch_exceptions), but only before - // RUN_ALL_TESTS() starts. It is technically possible to check the flag - // after the exception is caught and either report or re-throw the - // exception based on the flag's value: - // - // try { - // // Perform the test method. - // } catch (...) { - // if (GTEST_FLAG(catch_exceptions)) - // // Report the exception as failure. - // else - // throw; // Re-throws the original exception. - // } - // - // However, the purpose of this flag is to allow the program to drop into - // the debugger when the exception is thrown. On most platforms, once the - // control enters the catch block, the exception origin information is - // lost and the debugger will stop the program at the point of the - // re-throw in this function -- instead of at the point of the original - // throw statement in the code under test. For this reason, we perform - // the check early, sacrificing the ability to affect Google Test's - // exception handling in the method where the exception is thrown. - if (internal::GetUnitTestImpl()->catch_exceptions()) { -#if GTEST_HAS_EXCEPTIONS - try { - return HandleSehExceptionsInMethodIfSupported(object, method, location); - } catch (const GoogleTestFailureException&) { // NOLINT - // This exception doesn't originate in code under test. It makes no - // sense to report it as a test failure. - throw; - } catch (const std::exception& e) { // NOLINT - internal::ReportFailureInUnknownLocation( - TestPartResult::kFatalFailure, - FormatCxxExceptionMessage(e.what(), location)); - } catch (...) { // NOLINT - internal::ReportFailureInUnknownLocation( - TestPartResult::kFatalFailure, - FormatCxxExceptionMessage(NULL, location)); - } - return static_cast(0); -#else - return HandleSehExceptionsInMethodIfSupported(object, method, location); -#endif // GTEST_HAS_EXCEPTIONS - } else { - return (object->*method)(); - } -} - -} // namespace internal - -// Runs the test and updates the test result. -void Test::Run() { - if (!HasSameFixtureClass()) return; - - internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); - impl->os_stack_trace_getter()->UponLeavingGTest(); - internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()"); - // We will run the test only if SetUp() was successful. - if (!HasFatalFailure()) { - impl->os_stack_trace_getter()->UponLeavingGTest(); - internal::HandleExceptionsInMethodIfSupported( - this, &Test::TestBody, "the test body"); - } - - // However, we want to clean up as much as possible. Hence we will - // always call TearDown(), even if SetUp() or the test body has - // failed. - impl->os_stack_trace_getter()->UponLeavingGTest(); - internal::HandleExceptionsInMethodIfSupported( - this, &Test::TearDown, "TearDown()"); -} - -// Returns true iff the current test has a fatal failure. -bool Test::HasFatalFailure() { - return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure(); -} - -// Returns true iff the current test has a non-fatal failure. -bool Test::HasNonfatalFailure() { - return internal::GetUnitTestImpl()->current_test_result()-> - HasNonfatalFailure(); -} - -// class TestInfo - -// Constructs a TestInfo object. It assumes ownership of the test factory -// object. -// TODO(vladl@google.com): Make a_test_case_name and a_name const string&'s -// to signify they cannot be NULLs. -TestInfo::TestInfo(const char* a_test_case_name, - const char* a_name, - const char* a_type_param, - const char* a_value_param, - internal::TypeId fixture_class_id, - internal::TestFactoryBase* factory) - : test_case_name_(a_test_case_name), - name_(a_name), - type_param_(a_type_param ? new std::string(a_type_param) : NULL), - value_param_(a_value_param ? new std::string(a_value_param) : NULL), - fixture_class_id_(fixture_class_id), - should_run_(false), - is_disabled_(false), - matches_filter_(false), - factory_(factory), - result_() {} - -// Destructs a TestInfo object. -TestInfo::~TestInfo() { delete factory_; } - -namespace internal { - -// Creates a new TestInfo object and registers it with Google Test; -// returns the created object. -// -// Arguments: -// -// test_case_name: name of the test case -// name: name of the test -// type_param: the name of the test's type parameter, or NULL if -// this is not a typed or a type-parameterized test. -// value_param: text representation of the test's value parameter, -// or NULL if this is not a value-parameterized test. -// fixture_class_id: ID of the test fixture class -// set_up_tc: pointer to the function that sets up the test case -// tear_down_tc: pointer to the function that tears down the test case -// factory: pointer to the factory that creates a test object. -// The newly created TestInfo instance will assume -// ownership of the factory object. -TestInfo* MakeAndRegisterTestInfo( - const char* test_case_name, const char* name, - const char* type_param, - const char* value_param, - TypeId fixture_class_id, - SetUpTestCaseFunc set_up_tc, - TearDownTestCaseFunc tear_down_tc, - TestFactoryBase* factory) { - TestInfo* const test_info = - new TestInfo(test_case_name, name, type_param, value_param, - fixture_class_id, factory); - GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info); - return test_info; -} - -#if GTEST_HAS_PARAM_TEST -void ReportInvalidTestCaseType(const char* test_case_name, - const char* file, int line) { - Message errors; - errors - << "Attempted redefinition of test case " << test_case_name << ".\n" - << "All tests in the same test case must use the same test fixture\n" - << "class. However, in test case " << test_case_name << ", you tried\n" - << "to define a test using a fixture class different from the one\n" - << "used earlier. This can happen if the two fixture classes are\n" - << "from different namespaces and have the same name. You should\n" - << "probably rename one of the classes to put the tests into different\n" - << "test cases."; - - fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(), - errors.GetString().c_str()); -} -#endif // GTEST_HAS_PARAM_TEST - -} // namespace internal - -namespace { - -// A predicate that checks the test name of a TestInfo against a known -// value. -// -// This is used for implementation of the TestCase class only. We put -// it in the anonymous namespace to prevent polluting the outer -// namespace. -// -// TestNameIs is copyable. -class TestNameIs { - public: - // Constructor. - // - // TestNameIs has NO default constructor. - explicit TestNameIs(const char* name) - : name_(name) {} - - // Returns true iff the test name of test_info matches name_. - bool operator()(const TestInfo * test_info) const { - return test_info && internal::String(test_info->name()).Compare(name_) == 0; - } - - private: - internal::String name_; -}; - -} // namespace - -namespace internal { - -// This method expands all parameterized tests registered with macros TEST_P -// and INSTANTIATE_TEST_CASE_P into regular tests and registers those. -// This will be done just once during the program runtime. -void UnitTestImpl::RegisterParameterizedTests() { -#if GTEST_HAS_PARAM_TEST - if (!parameterized_tests_registered_) { - parameterized_test_registry_.RegisterTests(); - parameterized_tests_registered_ = true; - } -#endif -} - -} // namespace internal - -// Creates the test object, runs it, records its result, and then -// deletes it. -void TestInfo::Run() { - if (!should_run_) return; - - // Tells UnitTest where to store test result. - internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); - impl->set_current_test_info(this); - - TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater(); - - // Notifies the unit test event listeners that a test is about to start. - repeater->OnTestStart(*this); - - const TimeInMillis start = internal::GetTimeInMillis(); - - impl->os_stack_trace_getter()->UponLeavingGTest(); - - // Creates the test object. - Test* const test = internal::HandleExceptionsInMethodIfSupported( - factory_, &internal::TestFactoryBase::CreateTest, - "the test fixture's constructor"); - - // Runs the test only if the test object was created and its - // constructor didn't generate a fatal failure. - if ((test != NULL) && !Test::HasFatalFailure()) { - // This doesn't throw as all user code that can throw are wrapped into - // exception handling code. - test->Run(); - } - - // Deletes the test object. - impl->os_stack_trace_getter()->UponLeavingGTest(); - internal::HandleExceptionsInMethodIfSupported( - test, &Test::DeleteSelf_, "the test fixture's destructor"); - - result_.set_elapsed_time(internal::GetTimeInMillis() - start); - - // Notifies the unit test event listener that a test has just finished. - repeater->OnTestEnd(*this); - - // Tells UnitTest to stop associating assertion results to this - // test. - impl->set_current_test_info(NULL); -} - -// class TestCase - -// Gets the number of successful tests in this test case. -int TestCase::successful_test_count() const { - return CountIf(test_info_list_, TestPassed); -} - -// Gets the number of failed tests in this test case. -int TestCase::failed_test_count() const { - return CountIf(test_info_list_, TestFailed); -} - -int TestCase::disabled_test_count() const { - return CountIf(test_info_list_, TestDisabled); -} - -// Get the number of tests in this test case that should run. -int TestCase::test_to_run_count() const { - return CountIf(test_info_list_, ShouldRunTest); -} - -// Gets the number of all tests. -int TestCase::total_test_count() const { - return static_cast(test_info_list_.size()); -} - -// Creates a TestCase with the given name. -// -// Arguments: -// -// name: name of the test case -// a_type_param: the name of the test case's type parameter, or NULL if -// this is not a typed or a type-parameterized test case. -// set_up_tc: pointer to the function that sets up the test case -// tear_down_tc: pointer to the function that tears down the test case -TestCase::TestCase(const char* a_name, const char* a_type_param, - Test::SetUpTestCaseFunc set_up_tc, - Test::TearDownTestCaseFunc tear_down_tc) - : name_(a_name), - type_param_(a_type_param ? new std::string(a_type_param) : NULL), - set_up_tc_(set_up_tc), - tear_down_tc_(tear_down_tc), - should_run_(false), - elapsed_time_(0) { -} - -// Destructor of TestCase. -TestCase::~TestCase() { - // Deletes every Test in the collection. - ForEach(test_info_list_, internal::Delete); -} - -// Returns the i-th test among all the tests. i can range from 0 to -// total_test_count() - 1. If i is not in that range, returns NULL. -const TestInfo* TestCase::GetTestInfo(int i) const { - const int index = GetElementOr(test_indices_, i, -1); - return index < 0 ? NULL : test_info_list_[index]; -} - -// Returns the i-th test among all the tests. i can range from 0 to -// total_test_count() - 1. If i is not in that range, returns NULL. -TestInfo* TestCase::GetMutableTestInfo(int i) { - const int index = GetElementOr(test_indices_, i, -1); - return index < 0 ? NULL : test_info_list_[index]; -} - -// Adds a test to this test case. Will delete the test upon -// destruction of the TestCase object. -void TestCase::AddTestInfo(TestInfo * test_info) { - test_info_list_.push_back(test_info); - test_indices_.push_back(static_cast(test_indices_.size())); -} - -// Runs every test in this TestCase. -void TestCase::Run() { - if (!should_run_) return; - - internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); - impl->set_current_test_case(this); - - TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater(); - - repeater->OnTestCaseStart(*this); - impl->os_stack_trace_getter()->UponLeavingGTest(); - internal::HandleExceptionsInMethodIfSupported( - this, &TestCase::RunSetUpTestCase, "SetUpTestCase()"); - - const internal::TimeInMillis start = internal::GetTimeInMillis(); - for (int i = 0; i < total_test_count(); i++) { - GetMutableTestInfo(i)->Run(); - } - elapsed_time_ = internal::GetTimeInMillis() - start; - - impl->os_stack_trace_getter()->UponLeavingGTest(); - internal::HandleExceptionsInMethodIfSupported( - this, &TestCase::RunTearDownTestCase, "TearDownTestCase()"); - - repeater->OnTestCaseEnd(*this); - impl->set_current_test_case(NULL); -} - -// Clears the results of all tests in this test case. -void TestCase::ClearResult() { - ForEach(test_info_list_, TestInfo::ClearTestResult); -} - -// Shuffles the tests in this test case. -void TestCase::ShuffleTests(internal::Random* random) { - Shuffle(random, &test_indices_); -} - -// Restores the test order to before the first shuffle. -void TestCase::UnshuffleTests() { - for (size_t i = 0; i < test_indices_.size(); i++) { - test_indices_[i] = static_cast(i); - } -} - -// Formats a countable noun. Depending on its quantity, either the -// singular form or the plural form is used. e.g. -// -// FormatCountableNoun(1, "formula", "formuli") returns "1 formula". -// FormatCountableNoun(5, "book", "books") returns "5 books". -static internal::String FormatCountableNoun(int count, - const char * singular_form, - const char * plural_form) { - return internal::String::Format("%d %s", count, - count == 1 ? singular_form : plural_form); -} - -// Formats the count of tests. -static internal::String FormatTestCount(int test_count) { - return FormatCountableNoun(test_count, "test", "tests"); -} - -// Formats the count of test cases. -static internal::String FormatTestCaseCount(int test_case_count) { - return FormatCountableNoun(test_case_count, "test case", "test cases"); -} - -// Converts a TestPartResult::Type enum to human-friendly string -// representation. Both kNonFatalFailure and kFatalFailure are translated -// to "Failure", as the user usually doesn't care about the difference -// between the two when viewing the test result. -static const char * TestPartResultTypeToString(TestPartResult::Type type) { - switch (type) { - case TestPartResult::kSuccess: - return "Success"; - - case TestPartResult::kNonFatalFailure: - case TestPartResult::kFatalFailure: -#ifdef _MSC_VER - return "error: "; -#else - return "Failure\n"; -#endif - default: - return "Unknown result type"; - } -} - -// Prints a TestPartResult to a String. -static internal::String PrintTestPartResultToString( - const TestPartResult& test_part_result) { - return (Message() - << internal::FormatFileLocation(test_part_result.file_name(), - test_part_result.line_number()) - << " " << TestPartResultTypeToString(test_part_result.type()) - << test_part_result.message()).GetString(); -} - -// Prints a TestPartResult. -static void PrintTestPartResult(const TestPartResult& test_part_result) { - const internal::String& result = - PrintTestPartResultToString(test_part_result); - printf("%s\n", result.c_str()); - fflush(stdout); - // If the test program runs in Visual Studio or a debugger, the - // following statements add the test part result message to the Output - // window such that the user can double-click on it to jump to the - // corresponding source code location; otherwise they do nothing. -#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE - // We don't call OutputDebugString*() on Windows Mobile, as printing - // to stdout is done by OutputDebugString() there already - we don't - // want the same message printed twice. - ::OutputDebugStringA(result.c_str()); - ::OutputDebugStringA("\n"); -#endif -} - -// class PrettyUnitTestResultPrinter - -namespace internal { - -enum GTestColor { - COLOR_DEFAULT, - COLOR_RED, - COLOR_GREEN, - COLOR_YELLOW -}; - -#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE - -// Returns the character attribute for the given color. -WORD GetColorAttribute(GTestColor color) { - switch (color) { - case COLOR_RED: return FOREGROUND_RED; - case COLOR_GREEN: return FOREGROUND_GREEN; - case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN; - default: return 0; - } -} - -#else - -// Returns the ANSI color code for the given color. COLOR_DEFAULT is -// an invalid input. -const char* GetAnsiColorCode(GTestColor color) { - switch (color) { - case COLOR_RED: return "1"; - case COLOR_GREEN: return "2"; - case COLOR_YELLOW: return "3"; - default: return NULL; - }; -} - -#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE - -// Returns true iff Google Test should use colors in the output. -bool ShouldUseColor(bool stdout_is_tty) { - const char* const gtest_color = GTEST_FLAG(color).c_str(); - - if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) { -#if GTEST_OS_WINDOWS - // On Windows the TERM variable is usually not set, but the - // console there does support colors. - return stdout_is_tty; -#else - // On non-Windows platforms, we rely on the TERM variable. - const char* const term = posix::GetEnv("TERM"); - const bool term_supports_color = - String::CStringEquals(term, "xterm") || - String::CStringEquals(term, "xterm-color") || - String::CStringEquals(term, "xterm-256color") || - String::CStringEquals(term, "screen") || - String::CStringEquals(term, "linux") || - String::CStringEquals(term, "cygwin"); - return stdout_is_tty && term_supports_color; -#endif // GTEST_OS_WINDOWS - } - - return String::CaseInsensitiveCStringEquals(gtest_color, "yes") || - String::CaseInsensitiveCStringEquals(gtest_color, "true") || - String::CaseInsensitiveCStringEquals(gtest_color, "t") || - String::CStringEquals(gtest_color, "1"); - // We take "yes", "true", "t", and "1" as meaning "yes". If the - // value is neither one of these nor "auto", we treat it as "no" to - // be conservative. -} - -// Helpers for printing colored strings to stdout. Note that on Windows, we -// cannot simply emit special characters and have the terminal change colors. -// This routine must actually emit the characters rather than return a string -// that would be colored when printed, as can be done on Linux. -void ColoredPrintf(GTestColor color, const char* fmt, ...) { - va_list args; - va_start(args, fmt); - -#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS - const bool use_color = false; -#else - static const bool in_color_mode = - ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0); - const bool use_color = in_color_mode && (color != COLOR_DEFAULT); -#endif // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS - // The '!= 0' comparison is necessary to satisfy MSVC 7.1. - - if (!use_color) { - vprintf(fmt, args); - va_end(args); - return; - } - -#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE - const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); - - // Gets the current text color. - CONSOLE_SCREEN_BUFFER_INFO buffer_info; - GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); - const WORD old_color_attrs = buffer_info.wAttributes; - - // We need to flush the stream buffers into the console before each - // SetConsoleTextAttribute call lest it affect the text that is already - // printed but has not yet reached the console. - fflush(stdout); - SetConsoleTextAttribute(stdout_handle, - GetColorAttribute(color) | FOREGROUND_INTENSITY); - vprintf(fmt, args); - - fflush(stdout); - // Restores the text color. - SetConsoleTextAttribute(stdout_handle, old_color_attrs); -#else - printf("\033[0;3%sm", GetAnsiColorCode(color)); - vprintf(fmt, args); - printf("\033[m"); // Resets the terminal to default. -#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE - va_end(args); -} - -void PrintFullTestCommentIfPresent(const TestInfo& test_info) { - const char* const type_param = test_info.type_param(); - const char* const value_param = test_info.value_param(); - - if (type_param != NULL || value_param != NULL) { - printf(", where "); - if (type_param != NULL) { - printf("TypeParam = %s", type_param); - if (value_param != NULL) - printf(" and "); - } - if (value_param != NULL) { - printf("GetParam() = %s", value_param); - } - } -} - -// This class implements the TestEventListener interface. -// -// Class PrettyUnitTestResultPrinter is copyable. -class PrettyUnitTestResultPrinter : public TestEventListener { - public: - PrettyUnitTestResultPrinter() {} - static void PrintTestName(const char * test_case, const char * test) { - printf("%s.%s", test_case, test); - } - - // The following methods override what's in the TestEventListener class. - virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {} - virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration); - virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test); - virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {} - virtual void OnTestCaseStart(const TestCase& test_case); - virtual void OnTestStart(const TestInfo& test_info); - virtual void OnTestPartResult(const TestPartResult& result); - virtual void OnTestEnd(const TestInfo& test_info); - virtual void OnTestCaseEnd(const TestCase& test_case); - virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test); - virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {} - virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); - virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {} - - private: - static void PrintFailedTests(const UnitTest& unit_test); - - internal::String test_case_name_; -}; - - // Fired before each iteration of tests starts. -void PrettyUnitTestResultPrinter::OnTestIterationStart( - const UnitTest& unit_test, int iteration) { - if (GTEST_FLAG(repeat) != 1) - printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1); - - const char* const filter = GTEST_FLAG(filter).c_str(); - - // Prints the filter if it's not *. This reminds the user that some - // tests may be skipped. - if (!internal::String::CStringEquals(filter, kUniversalFilter)) { - ColoredPrintf(COLOR_YELLOW, - "Note: %s filter = %s\n", GTEST_NAME_, filter); - } - - if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) { - const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1); - ColoredPrintf(COLOR_YELLOW, - "Note: This is test shard %d of %s.\n", - static_cast(shard_index) + 1, - internal::posix::GetEnv(kTestTotalShards)); - } - - if (GTEST_FLAG(shuffle)) { - ColoredPrintf(COLOR_YELLOW, - "Note: Randomizing tests' orders with a seed of %d .\n", - unit_test.random_seed()); - } - - ColoredPrintf(COLOR_GREEN, "[==========] "); - printf("Running %s from %s.\n", - FormatTestCount(unit_test.test_to_run_count()).c_str(), - FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str()); - fflush(stdout); -} - -void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart( - const UnitTest& /*unit_test*/) { - ColoredPrintf(COLOR_GREEN, "[----------] "); - printf("Global test environment set-up.\n"); - fflush(stdout); -} - -void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) { - test_case_name_ = test_case.name(); - const internal::String counts = - FormatCountableNoun(test_case.test_to_run_count(), "test", "tests"); - ColoredPrintf(COLOR_GREEN, "[----------] "); - printf("%s from %s", counts.c_str(), test_case_name_.c_str()); - if (test_case.type_param() == NULL) { - printf("\n"); - } else { - printf(", where TypeParam = %s\n", test_case.type_param()); - } - fflush(stdout); -} - -void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) { - ColoredPrintf(COLOR_GREEN, "[ RUN ] "); - PrintTestName(test_case_name_.c_str(), test_info.name()); - printf("\n"); - fflush(stdout); -} - -// Called after an assertion failure. -void PrettyUnitTestResultPrinter::OnTestPartResult( - const TestPartResult& result) { - // If the test part succeeded, we don't need to do anything. - if (result.type() == TestPartResult::kSuccess) - return; - - // Print failure message from the assertion (e.g. expected this and got that). - PrintTestPartResult(result); - fflush(stdout); -} - -void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) { - if (test_info.result()->Passed()) { - ColoredPrintf(COLOR_GREEN, "[ OK ] "); - } else { - ColoredPrintf(COLOR_RED, "[ FAILED ] "); - } - PrintTestName(test_case_name_.c_str(), test_info.name()); - if (test_info.result()->Failed()) - PrintFullTestCommentIfPresent(test_info); - - if (GTEST_FLAG(print_time)) { - printf(" (%s ms)\n", internal::StreamableToString( - test_info.result()->elapsed_time()).c_str()); - } else { - printf("\n"); - } - fflush(stdout); -} - -void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) { - if (!GTEST_FLAG(print_time)) return; - - test_case_name_ = test_case.name(); - const internal::String counts = - FormatCountableNoun(test_case.test_to_run_count(), "test", "tests"); - ColoredPrintf(COLOR_GREEN, "[----------] "); - printf("%s from %s (%s ms total)\n\n", - counts.c_str(), test_case_name_.c_str(), - internal::StreamableToString(test_case.elapsed_time()).c_str()); - fflush(stdout); -} - -void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart( - const UnitTest& /*unit_test*/) { - ColoredPrintf(COLOR_GREEN, "[----------] "); - printf("Global test environment tear-down\n"); - fflush(stdout); -} - -// Internal helper for printing the list of failed tests. -void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) { - const int failed_test_count = unit_test.failed_test_count(); - if (failed_test_count == 0) { - return; - } - - for (int i = 0; i < unit_test.total_test_case_count(); ++i) { - const TestCase& test_case = *unit_test.GetTestCase(i); - if (!test_case.should_run() || (test_case.failed_test_count() == 0)) { - continue; - } - for (int j = 0; j < test_case.total_test_count(); ++j) { - const TestInfo& test_info = *test_case.GetTestInfo(j); - if (!test_info.should_run() || test_info.result()->Passed()) { - continue; - } - ColoredPrintf(COLOR_RED, "[ FAILED ] "); - printf("%s.%s", test_case.name(), test_info.name()); - PrintFullTestCommentIfPresent(test_info); - printf("\n"); - } - } -} - -void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, - int /*iteration*/) { - ColoredPrintf(COLOR_GREEN, "[==========] "); - printf("%s from %s ran.", - FormatTestCount(unit_test.test_to_run_count()).c_str(), - FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str()); - if (GTEST_FLAG(print_time)) { - printf(" (%s ms total)", - internal::StreamableToString(unit_test.elapsed_time()).c_str()); - } - printf("\n"); - ColoredPrintf(COLOR_GREEN, "[ PASSED ] "); - printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str()); - - int num_failures = unit_test.failed_test_count(); - if (!unit_test.Passed()) { - const int failed_test_count = unit_test.failed_test_count(); - ColoredPrintf(COLOR_RED, "[ FAILED ] "); - printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str()); - PrintFailedTests(unit_test); - printf("\n%2d FAILED %s\n", num_failures, - num_failures == 1 ? "TEST" : "TESTS"); - } - - int num_disabled = unit_test.disabled_test_count(); - if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) { - if (!num_failures) { - printf("\n"); // Add a spacer if no FAILURE banner is displayed. - } - ColoredPrintf(COLOR_YELLOW, - " YOU HAVE %d DISABLED %s\n\n", - num_disabled, - num_disabled == 1 ? "TEST" : "TESTS"); - } - // Ensure that Google Test output is printed before, e.g., heapchecker output. - fflush(stdout); -} - -// End PrettyUnitTestResultPrinter - -// class TestEventRepeater -// -// This class forwards events to other event listeners. -class TestEventRepeater : public TestEventListener { - public: - TestEventRepeater() : forwarding_enabled_(true) {} - virtual ~TestEventRepeater(); - void Append(TestEventListener *listener); - TestEventListener* Release(TestEventListener* listener); - - // Controls whether events will be forwarded to listeners_. Set to false - // in death test child processes. - bool forwarding_enabled() const { return forwarding_enabled_; } - void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; } - - virtual void OnTestProgramStart(const UnitTest& unit_test); - virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration); - virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test); - virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test); - virtual void OnTestCaseStart(const TestCase& test_case); - virtual void OnTestStart(const TestInfo& test_info); - virtual void OnTestPartResult(const TestPartResult& result); - virtual void OnTestEnd(const TestInfo& test_info); - virtual void OnTestCaseEnd(const TestCase& test_case); - virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test); - virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test); - virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); - virtual void OnTestProgramEnd(const UnitTest& unit_test); - - private: - // Controls whether events will be forwarded to listeners_. Set to false - // in death test child processes. - bool forwarding_enabled_; - // The list of listeners that receive events. - std::vector listeners_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater); -}; - -TestEventRepeater::~TestEventRepeater() { - ForEach(listeners_, Delete); -} - -void TestEventRepeater::Append(TestEventListener *listener) { - listeners_.push_back(listener); -} - -// TODO(vladl@google.com): Factor the search functionality into Vector::Find. -TestEventListener* TestEventRepeater::Release(TestEventListener *listener) { - for (size_t i = 0; i < listeners_.size(); ++i) { - if (listeners_[i] == listener) { - listeners_.erase(listeners_.begin() + i); - return listener; - } - } - - return NULL; -} - -// Since most methods are very similar, use macros to reduce boilerplate. -// This defines a member that forwards the call to all listeners. -#define GTEST_REPEATER_METHOD_(Name, Type) \ -void TestEventRepeater::Name(const Type& parameter) { \ - if (forwarding_enabled_) { \ - for (size_t i = 0; i < listeners_.size(); i++) { \ - listeners_[i]->Name(parameter); \ - } \ - } \ -} -// This defines a member that forwards the call to all listeners in reverse -// order. -#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \ -void TestEventRepeater::Name(const Type& parameter) { \ - if (forwarding_enabled_) { \ - for (int i = static_cast(listeners_.size()) - 1; i >= 0; i--) { \ - listeners_[i]->Name(parameter); \ - } \ - } \ -} - -GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest) -GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest) -GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase) -GTEST_REPEATER_METHOD_(OnTestStart, TestInfo) -GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult) -GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest) -GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest) -GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest) -GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo) -GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase) -GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest) - -#undef GTEST_REPEATER_METHOD_ -#undef GTEST_REVERSE_REPEATER_METHOD_ - -void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test, - int iteration) { - if (forwarding_enabled_) { - for (size_t i = 0; i < listeners_.size(); i++) { - listeners_[i]->OnTestIterationStart(unit_test, iteration); - } - } -} - -void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test, - int iteration) { - if (forwarding_enabled_) { - for (int i = static_cast(listeners_.size()) - 1; i >= 0; i--) { - listeners_[i]->OnTestIterationEnd(unit_test, iteration); - } - } -} - -// End TestEventRepeater - -// This class generates an XML output file. -class XmlUnitTestResultPrinter : public EmptyTestEventListener { - public: - explicit XmlUnitTestResultPrinter(const char* output_file); - - virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); - - private: - // Is c a whitespace character that is normalized to a space character - // when it appears in an XML attribute value? - static bool IsNormalizableWhitespace(char c) { - return c == 0x9 || c == 0xA || c == 0xD; - } - - // May c appear in a well-formed XML document? - static bool IsValidXmlCharacter(char c) { - return IsNormalizableWhitespace(c) || c >= 0x20; - } - - // Returns an XML-escaped copy of the input string str. If - // is_attribute is true, the text is meant to appear as an attribute - // value, and normalizable whitespace is preserved by replacing it - // with character references. - static String EscapeXml(const char* str, bool is_attribute); - - // Returns the given string with all characters invalid in XML removed. - static string RemoveInvalidXmlCharacters(const string& str); - - // Convenience wrapper around EscapeXml when str is an attribute value. - static String EscapeXmlAttribute(const char* str) { - return EscapeXml(str, true); - } - - // Convenience wrapper around EscapeXml when str is not an attribute value. - static String EscapeXmlText(const char* str) { return EscapeXml(str, false); } - - // Streams an XML CDATA section, escaping invalid CDATA sequences as needed. - static void OutputXmlCDataSection(::std::ostream* stream, const char* data); - - // Streams an XML representation of a TestInfo object. - static void OutputXmlTestInfo(::std::ostream* stream, - const char* test_case_name, - const TestInfo& test_info); - - // Prints an XML representation of a TestCase object - static void PrintXmlTestCase(FILE* out, const TestCase& test_case); - - // Prints an XML summary of unit_test to output stream out. - static void PrintXmlUnitTest(FILE* out, const UnitTest& unit_test); - - // Produces a string representing the test properties in a result as space - // delimited XML attributes based on the property key="value" pairs. - // When the String is not empty, it includes a space at the beginning, - // to delimit this attribute from prior attributes. - static String TestPropertiesAsXmlAttributes(const TestResult& result); - - // The output file. - const String output_file_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter); -}; - -// Creates a new XmlUnitTestResultPrinter. -XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file) - : output_file_(output_file) { - if (output_file_.c_str() == NULL || output_file_.empty()) { - fprintf(stderr, "XML output file may not be null\n"); - fflush(stderr); - exit(EXIT_FAILURE); - } -} - -// Called after the unit test ends. -void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, - int /*iteration*/) { - FILE* xmlout = NULL; - FilePath output_file(output_file_); - FilePath output_dir(output_file.RemoveFileName()); - - if (output_dir.CreateDirectoriesRecursively()) { - xmlout = posix::FOpen(output_file_.c_str(), "w"); - } - if (xmlout == NULL) { - // TODO(wan): report the reason of the failure. - // - // We don't do it for now as: - // - // 1. There is no urgent need for it. - // 2. It's a bit involved to make the errno variable thread-safe on - // all three operating systems (Linux, Windows, and Mac OS). - // 3. To interpret the meaning of errno in a thread-safe way, - // we need the strerror_r() function, which is not available on - // Windows. - fprintf(stderr, - "Unable to open file \"%s\"\n", - output_file_.c_str()); - fflush(stderr); - exit(EXIT_FAILURE); - } - PrintXmlUnitTest(xmlout, unit_test); - fclose(xmlout); -} - -// Returns an XML-escaped copy of the input string str. If is_attribute -// is true, the text is meant to appear as an attribute value, and -// normalizable whitespace is preserved by replacing it with character -// references. -// -// Invalid XML characters in str, if any, are stripped from the output. -// It is expected that most, if not all, of the text processed by this -// module will consist of ordinary English text. -// If this module is ever modified to produce version 1.1 XML output, -// most invalid characters can be retained using character references. -// TODO(wan): It might be nice to have a minimally invasive, human-readable -// escaping scheme for invalid characters, rather than dropping them. -String XmlUnitTestResultPrinter::EscapeXml(const char* str, bool is_attribute) { - Message m; - - if (str != NULL) { - for (const char* src = str; *src; ++src) { - switch (*src) { - case '<': - m << "<"; - break; - case '>': - m << ">"; - break; - case '&': - m << "&"; - break; - case '\'': - if (is_attribute) - m << "'"; - else - m << '\''; - break; - case '"': - if (is_attribute) - m << """; - else - m << '"'; - break; - default: - if (IsValidXmlCharacter(*src)) { - if (is_attribute && IsNormalizableWhitespace(*src)) - m << String::Format("&#x%02X;", unsigned(*src)); - else - m << *src; - } - break; - } - } - } - - return m.GetString(); -} - -// Returns the given string with all characters invalid in XML removed. -// Currently invalid characters are dropped from the string. An -// alternative is to replace them with certain characters such as . or ?. -string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(const string& str) { - string output; - output.reserve(str.size()); - for (string::const_iterator it = str.begin(); it != str.end(); ++it) - if (IsValidXmlCharacter(*it)) - output.push_back(*it); - - return output; -} - -// The following routines generate an XML representation of a UnitTest -// object. -// -// This is how Google Test concepts map to the DTD: -// -// <-- corresponds to a UnitTest object -// <-- corresponds to a TestCase object -// <-- corresponds to a TestInfo object -// ... -// ... -// ... -// <-- individual assertion failures -// -// -// - -// Formats the given time in milliseconds as seconds. -std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) { - ::std::stringstream ss; - ss << ms/1000.0; - return ss.str(); -} - -// Streams an XML CDATA section, escaping invalid CDATA sequences as needed. -void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream, - const char* data) { - const char* segment = data; - *stream << ""); - if (next_segment != NULL) { - stream->write( - segment, static_cast(next_segment - segment)); - *stream << "]]>]]>"); - } else { - *stream << segment; - break; - } - } - *stream << "]]>"; -} - -// Prints an XML representation of a TestInfo object. -// TODO(wan): There is also value in printing properties with the plain printer. -void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream, - const char* test_case_name, - const TestInfo& test_info) { - const TestResult& result = *test_info.result(); - *stream << " \n"; - *stream << " "; - const string location = internal::FormatCompilerIndependentFileLocation( - part.file_name(), part.line_number()); - const string message = location + "\n" + part.message(); - OutputXmlCDataSection(stream, - RemoveInvalidXmlCharacters(message).c_str()); - *stream << "\n"; - } - } - - if (failures == 0) - *stream << " />\n"; - else - *stream << " \n"; -} - -// Prints an XML representation of a TestCase object -void XmlUnitTestResultPrinter::PrintXmlTestCase(FILE* out, - const TestCase& test_case) { - fprintf(out, - " \n", - FormatTimeInMillisAsSeconds(test_case.elapsed_time()).c_str()); - for (int i = 0; i < test_case.total_test_count(); ++i) { - ::std::stringstream stream; - OutputXmlTestInfo(&stream, test_case.name(), *test_case.GetTestInfo(i)); - fprintf(out, "%s", StringStreamToString(&stream).c_str()); - } - fprintf(out, " \n"); -} - -// Prints an XML summary of unit_test to output stream out. -void XmlUnitTestResultPrinter::PrintXmlUnitTest(FILE* out, - const UnitTest& unit_test) { - fprintf(out, "\n"); - fprintf(out, - "\n"); - for (int i = 0; i < unit_test.total_test_case_count(); ++i) - PrintXmlTestCase(out, *unit_test.GetTestCase(i)); - fprintf(out, "\n"); -} - -// Produces a string representing the test properties in a result as space -// delimited XML attributes based on the property key="value" pairs. -String XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes( - const TestResult& result) { - Message attributes; - for (int i = 0; i < result.test_property_count(); ++i) { - const TestProperty& property = result.GetTestProperty(i); - attributes << " " << property.key() << "=" - << "\"" << EscapeXmlAttribute(property.value()) << "\""; - } - return attributes.GetString(); -} - -// End XmlUnitTestResultPrinter - -#if GTEST_CAN_STREAM_RESULTS_ - -// Streams test results to the given port on the given host machine. -class StreamingListener : public EmptyTestEventListener { - public: - // Escapes '=', '&', '%', and '\n' characters in str as "%xx". - static string UrlEncode(const char* str); - - StreamingListener(const string& host, const string& port) - : sockfd_(-1), host_name_(host), port_num_(port) { - MakeConnection(); - Send("gtest_streaming_protocol_version=1.0\n"); - } - - virtual ~StreamingListener() { - if (sockfd_ != -1) - CloseConnection(); - } - - void OnTestProgramStart(const UnitTest& /* unit_test */) { - Send("event=TestProgramStart\n"); - } - - void OnTestProgramEnd(const UnitTest& unit_test) { - // Note that Google Test current only report elapsed time for each - // test iteration, not for the entire test program. - Send(String::Format("event=TestProgramEnd&passed=%d\n", - unit_test.Passed())); - - // Notify the streaming server to stop. - CloseConnection(); - } - - void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) { - Send(String::Format("event=TestIterationStart&iteration=%d\n", - iteration)); - } - - void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) { - Send(String::Format("event=TestIterationEnd&passed=%d&elapsed_time=%sms\n", - unit_test.Passed(), - StreamableToString(unit_test.elapsed_time()).c_str())); - } - - void OnTestCaseStart(const TestCase& test_case) { - Send(String::Format("event=TestCaseStart&name=%s\n", test_case.name())); - } - - void OnTestCaseEnd(const TestCase& test_case) { - Send(String::Format("event=TestCaseEnd&passed=%d&elapsed_time=%sms\n", - test_case.Passed(), - StreamableToString(test_case.elapsed_time()).c_str())); - } - - void OnTestStart(const TestInfo& test_info) { - Send(String::Format("event=TestStart&name=%s\n", test_info.name())); - } - - void OnTestEnd(const TestInfo& test_info) { - Send(String::Format( - "event=TestEnd&passed=%d&elapsed_time=%sms\n", - (test_info.result())->Passed(), - StreamableToString((test_info.result())->elapsed_time()).c_str())); - } - - void OnTestPartResult(const TestPartResult& test_part_result) { - const char* file_name = test_part_result.file_name(); - if (file_name == NULL) - file_name = ""; - Send(String::Format("event=TestPartResult&file=%s&line=%d&message=", - UrlEncode(file_name).c_str(), - test_part_result.line_number())); - Send(UrlEncode(test_part_result.message()) + "\n"); - } - - private: - // Creates a client socket and connects to the server. - void MakeConnection(); - - // Closes the socket. - void CloseConnection() { - GTEST_CHECK_(sockfd_ != -1) - << "CloseConnection() can be called only when there is a connection."; - - close(sockfd_); - sockfd_ = -1; - } - - // Sends a string to the socket. - void Send(const string& message) { - GTEST_CHECK_(sockfd_ != -1) - << "Send() can be called only when there is a connection."; - - const int len = static_cast(message.length()); - if (write(sockfd_, message.c_str(), len) != len) { - GTEST_LOG_(WARNING) - << "stream_result_to: failed to stream to " - << host_name_ << ":" << port_num_; - } - } - - int sockfd_; // socket file descriptor - const string host_name_; - const string port_num_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener); -}; // class StreamingListener - -// Checks if str contains '=', '&', '%' or '\n' characters. If yes, -// replaces them by "%xx" where xx is their hexadecimal value. For -// example, replaces "=" with "%3D". This algorithm is O(strlen(str)) -// in both time and space -- important as the input str may contain an -// arbitrarily long test failure message and stack trace. -string StreamingListener::UrlEncode(const char* str) { - string result; - result.reserve(strlen(str) + 1); - for (char ch = *str; ch != '\0'; ch = *++str) { - switch (ch) { - case '%': - case '=': - case '&': - case '\n': - result.append(String::Format("%%%02x", static_cast(ch))); - break; - default: - result.push_back(ch); - break; - } - } - return result; -} - -void StreamingListener::MakeConnection() { - GTEST_CHECK_(sockfd_ == -1) - << "MakeConnection() can't be called when there is already a connection."; - - addrinfo hints; - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_UNSPEC; // To allow both IPv4 and IPv6 addresses. - hints.ai_socktype = SOCK_STREAM; - addrinfo* servinfo = NULL; - - // Use the getaddrinfo() to get a linked list of IP addresses for - // the given host name. - const int error_num = getaddrinfo( - host_name_.c_str(), port_num_.c_str(), &hints, &servinfo); - if (error_num != 0) { - GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: " - << gai_strerror(error_num); - } - - // Loop through all the results and connect to the first we can. - for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL; - cur_addr = cur_addr->ai_next) { - sockfd_ = socket( - cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol); - if (sockfd_ != -1) { - // Connect the client socket to the server socket. - if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) { - close(sockfd_); - sockfd_ = -1; - } - } - } - - freeaddrinfo(servinfo); // all done with this structure - - if (sockfd_ == -1) { - GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to " - << host_name_ << ":" << port_num_; - } -} - -// End of class Streaming Listener -#endif // GTEST_CAN_STREAM_RESULTS__ - -// Class ScopedTrace - -// Pushes the given source file location and message onto a per-thread -// trace stack maintained by Google Test. -// L < UnitTest::mutex_ -ScopedTrace::ScopedTrace(const char* file, int line, const Message& message) { - TraceInfo trace; - trace.file = file; - trace.line = line; - trace.message = message.GetString(); - - UnitTest::GetInstance()->PushGTestTrace(trace); -} - -// Pops the info pushed by the c'tor. -// L < UnitTest::mutex_ -ScopedTrace::~ScopedTrace() { - UnitTest::GetInstance()->PopGTestTrace(); -} - - -// class OsStackTraceGetter - -// Returns the current OS stack trace as a String. Parameters: -// -// max_depth - the maximum number of stack frames to be included -// in the trace. -// skip_count - the number of top frames to be skipped; doesn't count -// against max_depth. -// -// L < mutex_ -// We use "L < mutex_" to denote that the function may acquire mutex_. -String OsStackTraceGetter::CurrentStackTrace(int, int) { - return String(""); -} - -// L < mutex_ -void OsStackTraceGetter::UponLeavingGTest() { -} - -const char* const -OsStackTraceGetter::kElidedFramesMarker = - "... " GTEST_NAME_ " internal frames ..."; - -} // namespace internal - -// class TestEventListeners - -TestEventListeners::TestEventListeners() - : repeater_(new internal::TestEventRepeater()), - default_result_printer_(NULL), - default_xml_generator_(NULL) { -} - -TestEventListeners::~TestEventListeners() { delete repeater_; } - -// Returns the standard listener responsible for the default console -// output. Can be removed from the listeners list to shut down default -// console output. Note that removing this object from the listener list -// with Release transfers its ownership to the user. -void TestEventListeners::Append(TestEventListener* listener) { - repeater_->Append(listener); -} - -// Removes the given event listener from the list and returns it. It then -// becomes the caller's responsibility to delete the listener. Returns -// NULL if the listener is not found in the list. -TestEventListener* TestEventListeners::Release(TestEventListener* listener) { - if (listener == default_result_printer_) - default_result_printer_ = NULL; - else if (listener == default_xml_generator_) - default_xml_generator_ = NULL; - return repeater_->Release(listener); -} - -// Returns repeater that broadcasts the TestEventListener events to all -// subscribers. -TestEventListener* TestEventListeners::repeater() { return repeater_; } - -// Sets the default_result_printer attribute to the provided listener. -// The listener is also added to the listener list and previous -// default_result_printer is removed from it and deleted. The listener can -// also be NULL in which case it will not be added to the list. Does -// nothing if the previous and the current listener objects are the same. -void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) { - if (default_result_printer_ != listener) { - // It is an error to pass this method a listener that is already in the - // list. - delete Release(default_result_printer_); - default_result_printer_ = listener; - if (listener != NULL) - Append(listener); - } -} - -// Sets the default_xml_generator attribute to the provided listener. The -// listener is also added to the listener list and previous -// default_xml_generator is removed from it and deleted. The listener can -// also be NULL in which case it will not be added to the list. Does -// nothing if the previous and the current listener objects are the same. -void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) { - if (default_xml_generator_ != listener) { - // It is an error to pass this method a listener that is already in the - // list. - delete Release(default_xml_generator_); - default_xml_generator_ = listener; - if (listener != NULL) - Append(listener); - } -} - -// Controls whether events will be forwarded by the repeater to the -// listeners in the list. -bool TestEventListeners::EventForwardingEnabled() const { - return repeater_->forwarding_enabled(); -} - -void TestEventListeners::SuppressEventForwarding() { - repeater_->set_forwarding_enabled(false); -} - -// class UnitTest - -// Gets the singleton UnitTest object. The first time this method is -// called, a UnitTest object is constructed and returned. Consecutive -// calls will return the same object. -// -// We don't protect this under mutex_ as a user is not supposed to -// call this before main() starts, from which point on the return -// value will never change. -UnitTest * UnitTest::GetInstance() { - // When compiled with MSVC 7.1 in optimized mode, destroying the - // UnitTest object upon exiting the program messes up the exit code, - // causing successful tests to appear failed. We have to use a - // different implementation in this case to bypass the compiler bug. - // This implementation makes the compiler happy, at the cost of - // leaking the UnitTest object. - - // CodeGear C++Builder insists on a public destructor for the - // default implementation. Use this implementation to keep good OO - // design with private destructor. - -#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__) - static UnitTest* const instance = new UnitTest; - return instance; -#else - static UnitTest instance; - return &instance; -#endif // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__) -} - -// Gets the number of successful test cases. -int UnitTest::successful_test_case_count() const { - return impl()->successful_test_case_count(); -} - -// Gets the number of failed test cases. -int UnitTest::failed_test_case_count() const { - return impl()->failed_test_case_count(); -} - -// Gets the number of all test cases. -int UnitTest::total_test_case_count() const { - return impl()->total_test_case_count(); -} - -// Gets the number of all test cases that contain at least one test -// that should run. -int UnitTest::test_case_to_run_count() const { - return impl()->test_case_to_run_count(); -} - -// Gets the number of successful tests. -int UnitTest::successful_test_count() const { - return impl()->successful_test_count(); -} - -// Gets the number of failed tests. -int UnitTest::failed_test_count() const { return impl()->failed_test_count(); } - -// Gets the number of disabled tests. -int UnitTest::disabled_test_count() const { - return impl()->disabled_test_count(); -} - -// Gets the number of all tests. -int UnitTest::total_test_count() const { return impl()->total_test_count(); } - -// Gets the number of tests that should run. -int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); } - -// Gets the elapsed time, in milliseconds. -internal::TimeInMillis UnitTest::elapsed_time() const { - return impl()->elapsed_time(); -} - -// Returns true iff the unit test passed (i.e. all test cases passed). -bool UnitTest::Passed() const { return impl()->Passed(); } - -// Returns true iff the unit test failed (i.e. some test case failed -// or something outside of all tests failed). -bool UnitTest::Failed() const { return impl()->Failed(); } - -// Gets the i-th test case among all the test cases. i can range from 0 to -// total_test_case_count() - 1. If i is not in that range, returns NULL. -const TestCase* UnitTest::GetTestCase(int i) const { - return impl()->GetTestCase(i); -} - -// Gets the i-th test case among all the test cases. i can range from 0 to -// total_test_case_count() - 1. If i is not in that range, returns NULL. -TestCase* UnitTest::GetMutableTestCase(int i) { - return impl()->GetMutableTestCase(i); -} - -// Returns the list of event listeners that can be used to track events -// inside Google Test. -TestEventListeners& UnitTest::listeners() { - return *impl()->listeners(); -} - -// Registers and returns a global test environment. When a test -// program is run, all global test environments will be set-up in the -// order they were registered. After all tests in the program have -// finished, all global test environments will be torn-down in the -// *reverse* order they were registered. -// -// The UnitTest object takes ownership of the given environment. -// -// We don't protect this under mutex_, as we only support calling it -// from the main thread. -Environment* UnitTest::AddEnvironment(Environment* env) { - if (env == NULL) { - return NULL; - } - - impl_->environments().push_back(env); - return env; -} - -// Adds a TestPartResult to the current TestResult object. All Google Test -// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call -// this to report their results. The user code should use the -// assertion macros instead of calling this directly. -// L < mutex_ -void UnitTest::AddTestPartResult(TestPartResult::Type result_type, - const char* file_name, - int line_number, - const internal::String& message, - const internal::String& os_stack_trace) { - Message msg; - msg << message; - - internal::MutexLock lock(&mutex_); - if (impl_->gtest_trace_stack().size() > 0) { - msg << "\n" << GTEST_NAME_ << " trace:"; - - for (int i = static_cast(impl_->gtest_trace_stack().size()); - i > 0; --i) { - const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1]; - msg << "\n" << internal::FormatFileLocation(trace.file, trace.line) - << " " << trace.message; - } - } - - if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) { - msg << internal::kStackTraceMarker << os_stack_trace; - } - - const TestPartResult result = - TestPartResult(result_type, file_name, line_number, - msg.GetString().c_str()); - impl_->GetTestPartResultReporterForCurrentThread()-> - ReportTestPartResult(result); - - if (result_type != TestPartResult::kSuccess) { - // gtest_break_on_failure takes precedence over - // gtest_throw_on_failure. This allows a user to set the latter - // in the code (perhaps in order to use Google Test assertions - // with another testing framework) and specify the former on the - // command line for debugging. - if (GTEST_FLAG(break_on_failure)) { -#if GTEST_OS_WINDOWS - // Using DebugBreak on Windows allows gtest to still break into a debugger - // when a failure happens and both the --gtest_break_on_failure and - // the --gtest_catch_exceptions flags are specified. - DebugBreak(); -#else - // Dereference NULL through a volatile pointer to prevent the compiler - // from removing. We use this rather than abort() or __builtin_trap() for - // portability: Symbian doesn't implement abort() well, and some debuggers - // don't correctly trap abort(). - *static_cast(NULL) = 1; -#endif // GTEST_OS_WINDOWS - } else if (GTEST_FLAG(throw_on_failure)) { -#if GTEST_HAS_EXCEPTIONS - throw GoogleTestFailureException(result); -#else - // We cannot call abort() as it generates a pop-up in debug mode - // that cannot be suppressed in VC 7.1 or below. - exit(1); -#endif - } - } -} - -// Creates and adds a property to the current TestResult. If a property matching -// the supplied value already exists, updates its value instead. -void UnitTest::RecordPropertyForCurrentTest(const char* key, - const char* value) { - const TestProperty test_property(key, value); - impl_->current_test_result()->RecordProperty(test_property); -} - -// Runs all tests in this UnitTest object and prints the result. -// Returns 0 if successful, or 1 otherwise. -// -// We don't protect this under mutex_, as we only support calling it -// from the main thread. -int UnitTest::Run() { - // Captures the value of GTEST_FLAG(catch_exceptions). This value will be - // used for the duration of the program. - impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions)); - -#if GTEST_HAS_SEH - const bool in_death_test_child_process = - internal::GTEST_FLAG(internal_run_death_test).length() > 0; - - // Either the user wants Google Test to catch exceptions thrown by the - // tests or this is executing in the context of death test child - // process. In either case the user does not want to see pop-up dialogs - // about crashes - they are expected. - if (impl()->catch_exceptions() || in_death_test_child_process) { - -# if !GTEST_OS_WINDOWS_MOBILE - // SetErrorMode doesn't exist on CE. - SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT | - SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX); -# endif // !GTEST_OS_WINDOWS_MOBILE - -# if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE - // Death test children can be terminated with _abort(). On Windows, - // _abort() can show a dialog with a warning message. This forces the - // abort message to go to stderr instead. - _set_error_mode(_OUT_TO_STDERR); -# endif - -# if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE - // In the debug version, Visual Studio pops up a separate dialog - // offering a choice to debug the aborted program. We need to suppress - // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement - // executed. Google Test will notify the user of any unexpected - // failure via stderr. - // - // VC++ doesn't define _set_abort_behavior() prior to the version 8.0. - // Users of prior VC versions shall suffer the agony and pain of - // clicking through the countless debug dialogs. - // TODO(vladl@google.com): find a way to suppress the abort dialog() in the - // debug mode when compiled with VC 7.1 or lower. - if (!GTEST_FLAG(break_on_failure)) - _set_abort_behavior( - 0x0, // Clear the following flags: - _WRITE_ABORT_MSG | _CALL_REPORTFAULT); // pop-up window, core dump. -# endif - - } -#endif // GTEST_HAS_SEH - - return internal::HandleExceptionsInMethodIfSupported( - impl(), - &internal::UnitTestImpl::RunAllTests, - "auxiliary test code (environments or event listeners)") ? 0 : 1; -} - -// Returns the working directory when the first TEST() or TEST_F() was -// executed. -const char* UnitTest::original_working_dir() const { - return impl_->original_working_dir_.c_str(); -} - -// Returns the TestCase object for the test that's currently running, -// or NULL if no test is running. -// L < mutex_ -const TestCase* UnitTest::current_test_case() const { - internal::MutexLock lock(&mutex_); - return impl_->current_test_case(); -} - -// Returns the TestInfo object for the test that's currently running, -// or NULL if no test is running. -// L < mutex_ -const TestInfo* UnitTest::current_test_info() const { - internal::MutexLock lock(&mutex_); - return impl_->current_test_info(); -} - -// Returns the random seed used at the start of the current test run. -int UnitTest::random_seed() const { return impl_->random_seed(); } - -#if GTEST_HAS_PARAM_TEST -// Returns ParameterizedTestCaseRegistry object used to keep track of -// value-parameterized tests and instantiate and register them. -// L < mutex_ -internal::ParameterizedTestCaseRegistry& - UnitTest::parameterized_test_registry() { - return impl_->parameterized_test_registry(); -} -#endif // GTEST_HAS_PARAM_TEST - -// Creates an empty UnitTest. -UnitTest::UnitTest() { - impl_ = new internal::UnitTestImpl(this); -} - -// Destructor of UnitTest. -UnitTest::~UnitTest() { - delete impl_; -} - -// Pushes a trace defined by SCOPED_TRACE() on to the per-thread -// Google Test trace stack. -// L < mutex_ -void UnitTest::PushGTestTrace(const internal::TraceInfo& trace) { - internal::MutexLock lock(&mutex_); - impl_->gtest_trace_stack().push_back(trace); -} - -// Pops a trace from the per-thread Google Test trace stack. -// L < mutex_ -void UnitTest::PopGTestTrace() { - internal::MutexLock lock(&mutex_); - impl_->gtest_trace_stack().pop_back(); -} - -namespace internal { - -UnitTestImpl::UnitTestImpl(UnitTest* parent) - : parent_(parent), -#ifdef _MSC_VER -# pragma warning(push) // Saves the current warning state. -# pragma warning(disable:4355) // Temporarily disables warning 4355 - // (using this in initializer). - default_global_test_part_result_reporter_(this), - default_per_thread_test_part_result_reporter_(this), -# pragma warning(pop) // Restores the warning state again. -#else - default_global_test_part_result_reporter_(this), - default_per_thread_test_part_result_reporter_(this), -#endif // _MSC_VER - global_test_part_result_repoter_( - &default_global_test_part_result_reporter_), - per_thread_test_part_result_reporter_( - &default_per_thread_test_part_result_reporter_), -#if GTEST_HAS_PARAM_TEST - parameterized_test_registry_(), - parameterized_tests_registered_(false), -#endif // GTEST_HAS_PARAM_TEST - last_death_test_case_(-1), - current_test_case_(NULL), - current_test_info_(NULL), - ad_hoc_test_result_(), - os_stack_trace_getter_(NULL), - post_flag_parse_init_performed_(false), - random_seed_(0), // Will be overridden by the flag before first use. - random_(0), // Will be reseeded before first use. - elapsed_time_(0), -#if GTEST_HAS_DEATH_TEST - internal_run_death_test_flag_(NULL), - death_test_factory_(new DefaultDeathTestFactory), -#endif - // Will be overridden by the flag before first use. - catch_exceptions_(false) { - listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter); -} - -UnitTestImpl::~UnitTestImpl() { - // Deletes every TestCase. - ForEach(test_cases_, internal::Delete); - - // Deletes every Environment. - ForEach(environments_, internal::Delete); - - delete os_stack_trace_getter_; -} - -#if GTEST_HAS_DEATH_TEST -// Disables event forwarding if the control is currently in a death test -// subprocess. Must not be called before InitGoogleTest. -void UnitTestImpl::SuppressTestEventsIfInSubprocess() { - if (internal_run_death_test_flag_.get() != NULL) - listeners()->SuppressEventForwarding(); -} -#endif // GTEST_HAS_DEATH_TEST - -// Initializes event listeners performing XML output as specified by -// UnitTestOptions. Must not be called before InitGoogleTest. -void UnitTestImpl::ConfigureXmlOutput() { - const String& output_format = UnitTestOptions::GetOutputFormat(); - if (output_format == "xml") { - listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter( - UnitTestOptions::GetAbsolutePathToOutputFile().c_str())); - } else if (output_format != "") { - printf("WARNING: unrecognized output format \"%s\" ignored.\n", - output_format.c_str()); - fflush(stdout); - } -} - -#if GTEST_CAN_STREAM_RESULTS_ -// Initializes event listeners for streaming test results in String form. -// Must not be called before InitGoogleTest. -void UnitTestImpl::ConfigureStreamingOutput() { - const string& target = GTEST_FLAG(stream_result_to); - if (!target.empty()) { - const size_t pos = target.find(':'); - if (pos != string::npos) { - listeners()->Append(new StreamingListener(target.substr(0, pos), - target.substr(pos+1))); - } else { - printf("WARNING: unrecognized streaming target \"%s\" ignored.\n", - target.c_str()); - fflush(stdout); - } - } -} -#endif // GTEST_CAN_STREAM_RESULTS_ - -// Performs initialization dependent upon flag values obtained in -// ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to -// ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest -// this function is also called from RunAllTests. Since this function can be -// called more than once, it has to be idempotent. -void UnitTestImpl::PostFlagParsingInit() { - // Ensures that this function does not execute more than once. - if (!post_flag_parse_init_performed_) { - post_flag_parse_init_performed_ = true; - -#if GTEST_HAS_DEATH_TEST - InitDeathTestSubprocessControlInfo(); - SuppressTestEventsIfInSubprocess(); -#endif // GTEST_HAS_DEATH_TEST - - // Registers parameterized tests. This makes parameterized tests - // available to the UnitTest reflection API without running - // RUN_ALL_TESTS. - RegisterParameterizedTests(); - - // Configures listeners for XML output. This makes it possible for users - // to shut down the default XML output before invoking RUN_ALL_TESTS. - ConfigureXmlOutput(); - -#if GTEST_CAN_STREAM_RESULTS_ - // Configures listeners for streaming test results to the specified server. - ConfigureStreamingOutput(); -#endif // GTEST_CAN_STREAM_RESULTS_ - } -} - -// A predicate that checks the name of a TestCase against a known -// value. -// -// This is used for implementation of the UnitTest class only. We put -// it in the anonymous namespace to prevent polluting the outer -// namespace. -// -// TestCaseNameIs is copyable. -class TestCaseNameIs { - public: - // Constructor. - explicit TestCaseNameIs(const String& name) - : name_(name) {} - - // Returns true iff the name of test_case matches name_. - bool operator()(const TestCase* test_case) const { - return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0; - } - - private: - String name_; -}; - -// Finds and returns a TestCase with the given name. If one doesn't -// exist, creates one and returns it. It's the CALLER'S -// RESPONSIBILITY to ensure that this function is only called WHEN THE -// TESTS ARE NOT SHUFFLED. -// -// Arguments: -// -// test_case_name: name of the test case -// type_param: the name of the test case's type parameter, or NULL if -// this is not a typed or a type-parameterized test case. -// set_up_tc: pointer to the function that sets up the test case -// tear_down_tc: pointer to the function that tears down the test case -TestCase* UnitTestImpl::GetTestCase(const char* test_case_name, - const char* type_param, - Test::SetUpTestCaseFunc set_up_tc, - Test::TearDownTestCaseFunc tear_down_tc) { - // Can we find a TestCase with the given name? - const std::vector::const_iterator test_case = - std::find_if(test_cases_.begin(), test_cases_.end(), - TestCaseNameIs(test_case_name)); - - if (test_case != test_cases_.end()) - return *test_case; - - // No. Let's create one. - TestCase* const new_test_case = - new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc); - - // Is this a death test case? - if (internal::UnitTestOptions::MatchesFilter(String(test_case_name), - kDeathTestCaseFilter)) { - // Yes. Inserts the test case after the last death test case - // defined so far. This only works when the test cases haven't - // been shuffled. Otherwise we may end up running a death test - // after a non-death test. - ++last_death_test_case_; - test_cases_.insert(test_cases_.begin() + last_death_test_case_, - new_test_case); - } else { - // No. Appends to the end of the list. - test_cases_.push_back(new_test_case); - } - - test_case_indices_.push_back(static_cast(test_case_indices_.size())); - return new_test_case; -} - -// Helpers for setting up / tearing down the given environment. They -// are for use in the ForEach() function. -static void SetUpEnvironment(Environment* env) { env->SetUp(); } -static void TearDownEnvironment(Environment* env) { env->TearDown(); } - -// Runs all tests in this UnitTest object, prints the result, and -// returns true if all tests are successful. If any exception is -// thrown during a test, the test is considered to be failed, but the -// rest of the tests will still be run. -// -// When parameterized tests are enabled, it expands and registers -// parameterized tests first in RegisterParameterizedTests(). -// All other functions called from RunAllTests() may safely assume that -// parameterized tests are ready to be counted and run. -bool UnitTestImpl::RunAllTests() { - // Makes sure InitGoogleTest() was called. - if (!GTestIsInitialized()) { - printf("%s", - "\nThis test program did NOT call ::testing::InitGoogleTest " - "before calling RUN_ALL_TESTS(). Please fix it.\n"); - return false; - } - - // Do not run any test if the --help flag was specified. - if (g_help_flag) - return true; - - // Repeats the call to the post-flag parsing initialization in case the - // user didn't call InitGoogleTest. - PostFlagParsingInit(); - - // Even if sharding is not on, test runners may want to use the - // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding - // protocol. - internal::WriteToShardStatusFileIfNeeded(); - - // True iff we are in a subprocess for running a thread-safe-style - // death test. - bool in_subprocess_for_death_test = false; - -#if GTEST_HAS_DEATH_TEST - in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL); -#endif // GTEST_HAS_DEATH_TEST - - const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex, - in_subprocess_for_death_test); - - // Compares the full test names with the filter to decide which - // tests to run. - const bool has_tests_to_run = FilterTests(should_shard - ? HONOR_SHARDING_PROTOCOL - : IGNORE_SHARDING_PROTOCOL) > 0; - - // Lists the tests and exits if the --gtest_list_tests flag was specified. - if (GTEST_FLAG(list_tests)) { - // This must be called *after* FilterTests() has been called. - ListTestsMatchingFilter(); - return true; - } - - random_seed_ = GTEST_FLAG(shuffle) ? - GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0; - - // True iff at least one test has failed. - bool failed = false; - - TestEventListener* repeater = listeners()->repeater(); - - repeater->OnTestProgramStart(*parent_); - - // How many times to repeat the tests? We don't want to repeat them - // when we are inside the subprocess of a death test. - const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat); - // Repeats forever if the repeat count is negative. - const bool forever = repeat < 0; - for (int i = 0; forever || i != repeat; i++) { - // We want to preserve failures generated by ad-hoc test - // assertions executed before RUN_ALL_TESTS(). - ClearNonAdHocTestResult(); - - const TimeInMillis start = GetTimeInMillis(); - - // Shuffles test cases and tests if requested. - if (has_tests_to_run && GTEST_FLAG(shuffle)) { - random()->Reseed(random_seed_); - // This should be done before calling OnTestIterationStart(), - // such that a test event listener can see the actual test order - // in the event. - ShuffleTests(); - } - - // Tells the unit test event listeners that the tests are about to start. - repeater->OnTestIterationStart(*parent_, i); - - // Runs each test case if there is at least one test to run. - if (has_tests_to_run) { - // Sets up all environments beforehand. - repeater->OnEnvironmentsSetUpStart(*parent_); - ForEach(environments_, SetUpEnvironment); - repeater->OnEnvironmentsSetUpEnd(*parent_); - - // Runs the tests only if there was no fatal failure during global - // set-up. - if (!Test::HasFatalFailure()) { - for (int test_index = 0; test_index < total_test_case_count(); - test_index++) { - GetMutableTestCase(test_index)->Run(); - } - } - - // Tears down all environments in reverse order afterwards. - repeater->OnEnvironmentsTearDownStart(*parent_); - std::for_each(environments_.rbegin(), environments_.rend(), - TearDownEnvironment); - repeater->OnEnvironmentsTearDownEnd(*parent_); - } - - elapsed_time_ = GetTimeInMillis() - start; - - // Tells the unit test event listener that the tests have just finished. - repeater->OnTestIterationEnd(*parent_, i); - - // Gets the result and clears it. - if (!Passed()) { - failed = true; - } - - // Restores the original test order after the iteration. This - // allows the user to quickly repro a failure that happens in the - // N-th iteration without repeating the first (N - 1) iterations. - // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in - // case the user somehow changes the value of the flag somewhere - // (it's always safe to unshuffle the tests). - UnshuffleTests(); - - if (GTEST_FLAG(shuffle)) { - // Picks a new random seed for each iteration. - random_seed_ = GetNextRandomSeed(random_seed_); - } - } - - repeater->OnTestProgramEnd(*parent_); - - return !failed; -} - -// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file -// if the variable is present. If a file already exists at this location, this -// function will write over it. If the variable is present, but the file cannot -// be created, prints an error and exits. -void WriteToShardStatusFileIfNeeded() { - const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile); - if (test_shard_file != NULL) { - FILE* const file = posix::FOpen(test_shard_file, "w"); - if (file == NULL) { - ColoredPrintf(COLOR_RED, - "Could not write to the test shard status file \"%s\" " - "specified by the %s environment variable.\n", - test_shard_file, kTestShardStatusFile); - fflush(stdout); - exit(EXIT_FAILURE); - } - fclose(file); - } -} - -// Checks whether sharding is enabled by examining the relevant -// environment variable values. If the variables are present, -// but inconsistent (i.e., shard_index >= total_shards), prints -// an error and exits. If in_subprocess_for_death_test, sharding is -// disabled because it must only be applied to the original test -// process. Otherwise, we could filter out death tests we intended to execute. -bool ShouldShard(const char* total_shards_env, - const char* shard_index_env, - bool in_subprocess_for_death_test) { - if (in_subprocess_for_death_test) { - return false; - } - - const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1); - const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1); - - if (total_shards == -1 && shard_index == -1) { - return false; - } else if (total_shards == -1 && shard_index != -1) { - const Message msg = Message() - << "Invalid environment variables: you have " - << kTestShardIndex << " = " << shard_index - << ", but have left " << kTestTotalShards << " unset.\n"; - ColoredPrintf(COLOR_RED, msg.GetString().c_str()); - fflush(stdout); - exit(EXIT_FAILURE); - } else if (total_shards != -1 && shard_index == -1) { - const Message msg = Message() - << "Invalid environment variables: you have " - << kTestTotalShards << " = " << total_shards - << ", but have left " << kTestShardIndex << " unset.\n"; - ColoredPrintf(COLOR_RED, msg.GetString().c_str()); - fflush(stdout); - exit(EXIT_FAILURE); - } else if (shard_index < 0 || shard_index >= total_shards) { - const Message msg = Message() - << "Invalid environment variables: we require 0 <= " - << kTestShardIndex << " < " << kTestTotalShards - << ", but you have " << kTestShardIndex << "=" << shard_index - << ", " << kTestTotalShards << "=" << total_shards << ".\n"; - ColoredPrintf(COLOR_RED, msg.GetString().c_str()); - fflush(stdout); - exit(EXIT_FAILURE); - } - - return total_shards > 1; -} - -// Parses the environment variable var as an Int32. If it is unset, -// returns default_val. If it is not an Int32, prints an error -// and aborts. -Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) { - const char* str_val = posix::GetEnv(var); - if (str_val == NULL) { - return default_val; - } - - Int32 result; - if (!ParseInt32(Message() << "The value of environment variable " << var, - str_val, &result)) { - exit(EXIT_FAILURE); - } - return result; -} - -// Given the total number of shards, the shard index, and the test id, -// returns true iff the test should be run on this shard. The test id is -// some arbitrary but unique non-negative integer assigned to each test -// method. Assumes that 0 <= shard_index < total_shards. -bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) { - return (test_id % total_shards) == shard_index; -} - -// Compares the name of each test with the user-specified filter to -// decide whether the test should be run, then records the result in -// each TestCase and TestInfo object. -// If shard_tests == true, further filters tests based on sharding -// variables in the environment - see -// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide. -// Returns the number of tests that should run. -int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) { - const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ? - Int32FromEnvOrDie(kTestTotalShards, -1) : -1; - const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ? - Int32FromEnvOrDie(kTestShardIndex, -1) : -1; - - // num_runnable_tests are the number of tests that will - // run across all shards (i.e., match filter and are not disabled). - // num_selected_tests are the number of tests to be run on - // this shard. - int num_runnable_tests = 0; - int num_selected_tests = 0; - for (size_t i = 0; i < test_cases_.size(); i++) { - TestCase* const test_case = test_cases_[i]; - const String &test_case_name = test_case->name(); - test_case->set_should_run(false); - - for (size_t j = 0; j < test_case->test_info_list().size(); j++) { - TestInfo* const test_info = test_case->test_info_list()[j]; - const String test_name(test_info->name()); - // A test is disabled if test case name or test name matches - // kDisableTestFilter. - const bool is_disabled = - internal::UnitTestOptions::MatchesFilter(test_case_name, - kDisableTestFilter) || - internal::UnitTestOptions::MatchesFilter(test_name, - kDisableTestFilter); - test_info->is_disabled_ = is_disabled; - - const bool matches_filter = - internal::UnitTestOptions::FilterMatchesTest(test_case_name, - test_name); - test_info->matches_filter_ = matches_filter; - - const bool is_runnable = - (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) && - matches_filter; - - const bool is_selected = is_runnable && - (shard_tests == IGNORE_SHARDING_PROTOCOL || - ShouldRunTestOnShard(total_shards, shard_index, - num_runnable_tests)); - - num_runnable_tests += is_runnable; - num_selected_tests += is_selected; - - test_info->should_run_ = is_selected; - test_case->set_should_run(test_case->should_run() || is_selected); - } - } - return num_selected_tests; -} - -// Prints the names of the tests matching the user-specified filter flag. -void UnitTestImpl::ListTestsMatchingFilter() { - for (size_t i = 0; i < test_cases_.size(); i++) { - const TestCase* const test_case = test_cases_[i]; - bool printed_test_case_name = false; - - for (size_t j = 0; j < test_case->test_info_list().size(); j++) { - const TestInfo* const test_info = - test_case->test_info_list()[j]; - if (test_info->matches_filter_) { - if (!printed_test_case_name) { - printed_test_case_name = true; - printf("%s.\n", test_case->name()); - } - printf(" %s\n", test_info->name()); - } - } - } - fflush(stdout); -} - -// Sets the OS stack trace getter. -// -// Does nothing if the input and the current OS stack trace getter are -// the same; otherwise, deletes the old getter and makes the input the -// current getter. -void UnitTestImpl::set_os_stack_trace_getter( - OsStackTraceGetterInterface* getter) { - if (os_stack_trace_getter_ != getter) { - delete os_stack_trace_getter_; - os_stack_trace_getter_ = getter; - } -} - -// Returns the current OS stack trace getter if it is not NULL; -// otherwise, creates an OsStackTraceGetter, makes it the current -// getter, and returns it. -OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() { - if (os_stack_trace_getter_ == NULL) { - os_stack_trace_getter_ = new OsStackTraceGetter; - } - - return os_stack_trace_getter_; -} - -// Returns the TestResult for the test that's currently running, or -// the TestResult for the ad hoc test if no test is running. -TestResult* UnitTestImpl::current_test_result() { - return current_test_info_ ? - &(current_test_info_->result_) : &ad_hoc_test_result_; -} - -// Shuffles all test cases, and the tests within each test case, -// making sure that death tests are still run first. -void UnitTestImpl::ShuffleTests() { - // Shuffles the death test cases. - ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_); - - // Shuffles the non-death test cases. - ShuffleRange(random(), last_death_test_case_ + 1, - static_cast(test_cases_.size()), &test_case_indices_); - - // Shuffles the tests inside each test case. - for (size_t i = 0; i < test_cases_.size(); i++) { - test_cases_[i]->ShuffleTests(random()); - } -} - -// Restores the test cases and tests to their order before the first shuffle. -void UnitTestImpl::UnshuffleTests() { - for (size_t i = 0; i < test_cases_.size(); i++) { - // Unshuffles the tests in each test case. - test_cases_[i]->UnshuffleTests(); - // Resets the index of each test case. - test_case_indices_[i] = static_cast(i); - } -} - -// Returns the current OS stack trace as a String. -// -// The maximum number of stack frames to be included is specified by -// the gtest_stack_trace_depth flag. The skip_count parameter -// specifies the number of top frames to be skipped, which doesn't -// count against the number of frames to be included. -// -// For example, if Foo() calls Bar(), which in turn calls -// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in -// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't. -String GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/, - int skip_count) { - // We pass skip_count + 1 to skip this wrapper function in addition - // to what the user really wants to skip. - return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1); -} - -// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to -// suppress unreachable code warnings. -namespace { -class ClassUniqueToAlwaysTrue {}; -} - -bool IsTrue(bool condition) { return condition; } - -bool AlwaysTrue() { -#if GTEST_HAS_EXCEPTIONS - // This condition is always false so AlwaysTrue() never actually throws, - // but it makes the compiler think that it may throw. - if (IsTrue(false)) - throw ClassUniqueToAlwaysTrue(); -#endif // GTEST_HAS_EXCEPTIONS - return true; -} - -// If *pstr starts with the given prefix, modifies *pstr to be right -// past the prefix and returns true; otherwise leaves *pstr unchanged -// and returns false. None of pstr, *pstr, and prefix can be NULL. -bool SkipPrefix(const char* prefix, const char** pstr) { - const size_t prefix_len = strlen(prefix); - if (strncmp(*pstr, prefix, prefix_len) == 0) { - *pstr += prefix_len; - return true; - } - return false; -} - -// Parses a string as a command line flag. The string should have -// the format "--flag=value". When def_optional is true, the "=value" -// part can be omitted. -// -// Returns the value of the flag, or NULL if the parsing failed. -const char* ParseFlagValue(const char* str, - const char* flag, - bool def_optional) { - // str and flag must not be NULL. - if (str == NULL || flag == NULL) return NULL; - - // The flag must start with "--" followed by GTEST_FLAG_PREFIX_. - const String flag_str = String::Format("--%s%s", GTEST_FLAG_PREFIX_, flag); - const size_t flag_len = flag_str.length(); - if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL; - - // Skips the flag name. - const char* flag_end = str + flag_len; - - // When def_optional is true, it's OK to not have a "=value" part. - if (def_optional && (flag_end[0] == '\0')) { - return flag_end; - } - - // If def_optional is true and there are more characters after the - // flag name, or if def_optional is false, there must be a '=' after - // the flag name. - if (flag_end[0] != '=') return NULL; - - // Returns the string after "=". - return flag_end + 1; -} - -// Parses a string for a bool flag, in the form of either -// "--flag=value" or "--flag". -// -// In the former case, the value is taken as true as long as it does -// not start with '0', 'f', or 'F'. -// -// In the latter case, the value is taken as true. -// -// On success, stores the value of the flag in *value, and returns -// true. On failure, returns false without changing *value. -bool ParseBoolFlag(const char* str, const char* flag, bool* value) { - // Gets the value of the flag as a string. - const char* const value_str = ParseFlagValue(str, flag, true); - - // Aborts if the parsing failed. - if (value_str == NULL) return false; - - // Converts the string value to a bool. - *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F'); - return true; -} - -// Parses a string for an Int32 flag, in the form of -// "--flag=value". -// -// On success, stores the value of the flag in *value, and returns -// true. On failure, returns false without changing *value. -bool ParseInt32Flag(const char* str, const char* flag, Int32* value) { - // Gets the value of the flag as a string. - const char* const value_str = ParseFlagValue(str, flag, false); - - // Aborts if the parsing failed. - if (value_str == NULL) return false; - - // Sets *value to the value of the flag. - return ParseInt32(Message() << "The value of flag --" << flag, - value_str, value); -} - -// Parses a string for a string flag, in the form of -// "--flag=value". -// -// On success, stores the value of the flag in *value, and returns -// true. On failure, returns false without changing *value. -bool ParseStringFlag(const char* str, const char* flag, String* value) { - // Gets the value of the flag as a string. - const char* const value_str = ParseFlagValue(str, flag, false); - - // Aborts if the parsing failed. - if (value_str == NULL) return false; - - // Sets *value to the value of the flag. - *value = value_str; - return true; -} - -// Determines whether a string has a prefix that Google Test uses for its -// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_. -// If Google Test detects that a command line flag has its prefix but is not -// recognized, it will print its help message. Flags starting with -// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test -// internal flags and do not trigger the help message. -static bool HasGoogleTestFlagPrefix(const char* str) { - return (SkipPrefix("--", &str) || - SkipPrefix("-", &str) || - SkipPrefix("/", &str)) && - !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) && - (SkipPrefix(GTEST_FLAG_PREFIX_, &str) || - SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str)); -} - -// Prints a string containing code-encoded text. The following escape -// sequences can be used in the string to control the text color: -// -// @@ prints a single '@' character. -// @R changes the color to red. -// @G changes the color to green. -// @Y changes the color to yellow. -// @D changes to the default terminal text color. -// -// TODO(wan@google.com): Write tests for this once we add stdout -// capturing to Google Test. -static void PrintColorEncoded(const char* str) { - GTestColor color = COLOR_DEFAULT; // The current color. - - // Conceptually, we split the string into segments divided by escape - // sequences. Then we print one segment at a time. At the end of - // each iteration, the str pointer advances to the beginning of the - // next segment. - for (;;) { - const char* p = strchr(str, '@'); - if (p == NULL) { - ColoredPrintf(color, "%s", str); - return; - } - - ColoredPrintf(color, "%s", String(str, p - str).c_str()); - - const char ch = p[1]; - str = p + 2; - if (ch == '@') { - ColoredPrintf(color, "@"); - } else if (ch == 'D') { - color = COLOR_DEFAULT; - } else if (ch == 'R') { - color = COLOR_RED; - } else if (ch == 'G') { - color = COLOR_GREEN; - } else if (ch == 'Y') { - color = COLOR_YELLOW; - } else { - --str; - } - } -} - -static const char kColorEncodedHelpMessage[] = -"This program contains tests written using " GTEST_NAME_ ". You can use the\n" -"following command line flags to control its behavior:\n" -"\n" -"Test Selection:\n" -" @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n" -" List the names of all tests instead of running them. The name of\n" -" TEST(Foo, Bar) is \"Foo.Bar\".\n" -" @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS" - "[@G-@YNEGATIVE_PATTERNS]@D\n" -" Run only the tests whose name matches one of the positive patterns but\n" -" none of the negative patterns. '?' matches any single character; '*'\n" -" matches any substring; ':' separates two patterns.\n" -" @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n" -" Run all disabled tests too.\n" -"\n" -"Test Execution:\n" -" @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n" -" Run the tests repeatedly; use a negative count to repeat forever.\n" -" @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n" -" Randomize tests' orders on every iteration.\n" -" @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n" -" Random number seed to use for shuffling test orders (between 1 and\n" -" 99999, or 0 to use a seed based on the current time).\n" -"\n" -"Test Output:\n" -" @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n" -" Enable/disable colored output. The default is @Gauto@D.\n" -" -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n" -" Don't print the elapsed time of each test.\n" -" @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G" - GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n" -" Generate an XML report in the given directory or with the given file\n" -" name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n" -#if GTEST_CAN_STREAM_RESULTS_ -" @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n" -" Stream test results to the given server.\n" -#endif // GTEST_CAN_STREAM_RESULTS_ -"\n" -"Assertion Behavior:\n" -#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS -" @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n" -" Set the default death test style.\n" -#endif // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS -" @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n" -" Turn assertion failures into debugger break-points.\n" -" @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n" -" Turn assertion failures into C++ exceptions.\n" -" @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n" -" Do not report exceptions as test failures. Instead, allow them\n" -" to crash the program or throw a pop-up (on Windows).\n" -"\n" -"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set " - "the corresponding\n" -"environment variable of a flag (all letters in upper-case). For example, to\n" -"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_ - "color=no@D or set\n" -"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n" -"\n" -"For more information, please read the " GTEST_NAME_ " documentation at\n" -"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n" -"(not one in your own code or tests), please report it to\n" -"@G<" GTEST_DEV_EMAIL_ ">@D.\n"; - -// Parses the command line for Google Test flags, without initializing -// other parts of Google Test. The type parameter CharType can be -// instantiated to either char or wchar_t. -template -void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) { - for (int i = 1; i < *argc; i++) { - const String arg_string = StreamableToString(argv[i]); - const char* const arg = arg_string.c_str(); - - using internal::ParseBoolFlag; - using internal::ParseInt32Flag; - using internal::ParseStringFlag; - - // Do we see a Google Test flag? - if (ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag, - >EST_FLAG(also_run_disabled_tests)) || - ParseBoolFlag(arg, kBreakOnFailureFlag, - >EST_FLAG(break_on_failure)) || - ParseBoolFlag(arg, kCatchExceptionsFlag, - >EST_FLAG(catch_exceptions)) || - ParseStringFlag(arg, kColorFlag, >EST_FLAG(color)) || - ParseStringFlag(arg, kDeathTestStyleFlag, - >EST_FLAG(death_test_style)) || - ParseBoolFlag(arg, kDeathTestUseFork, - >EST_FLAG(death_test_use_fork)) || - ParseStringFlag(arg, kFilterFlag, >EST_FLAG(filter)) || - ParseStringFlag(arg, kInternalRunDeathTestFlag, - >EST_FLAG(internal_run_death_test)) || - ParseBoolFlag(arg, kListTestsFlag, >EST_FLAG(list_tests)) || - ParseStringFlag(arg, kOutputFlag, >EST_FLAG(output)) || - ParseBoolFlag(arg, kPrintTimeFlag, >EST_FLAG(print_time)) || - ParseInt32Flag(arg, kRandomSeedFlag, >EST_FLAG(random_seed)) || - ParseInt32Flag(arg, kRepeatFlag, >EST_FLAG(repeat)) || - ParseBoolFlag(arg, kShuffleFlag, >EST_FLAG(shuffle)) || - ParseInt32Flag(arg, kStackTraceDepthFlag, - >EST_FLAG(stack_trace_depth)) || - ParseStringFlag(arg, kStreamResultToFlag, - >EST_FLAG(stream_result_to)) || - ParseBoolFlag(arg, kThrowOnFailureFlag, - >EST_FLAG(throw_on_failure)) - ) { - // Yes. Shift the remainder of the argv list left by one. Note - // that argv has (*argc + 1) elements, the last one always being - // NULL. The following loop moves the trailing NULL element as - // well. - for (int j = i; j != *argc; j++) { - argv[j] = argv[j + 1]; - } - - // Decrements the argument count. - (*argc)--; - - // We also need to decrement the iterator as we just removed - // an element. - i--; - } else if (arg_string == "--help" || arg_string == "-h" || - arg_string == "-?" || arg_string == "/?" || - HasGoogleTestFlagPrefix(arg)) { - // Both help flag and unrecognized Google Test flags (excluding - // internal ones) trigger help display. - g_help_flag = true; - } - } - - if (g_help_flag) { - // We print the help here instead of in RUN_ALL_TESTS(), as the - // latter may not be called at all if the user is using Google - // Test with another testing framework. - PrintColorEncoded(kColorEncodedHelpMessage); - } -} - -// Parses the command line for Google Test flags, without initializing -// other parts of Google Test. -void ParseGoogleTestFlagsOnly(int* argc, char** argv) { - ParseGoogleTestFlagsOnlyImpl(argc, argv); -} -void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) { - ParseGoogleTestFlagsOnlyImpl(argc, argv); -} - -// The internal implementation of InitGoogleTest(). -// -// The type parameter CharType can be instantiated to either char or -// wchar_t. -template -void InitGoogleTestImpl(int* argc, CharType** argv) { - g_init_gtest_count++; - - // We don't want to run the initialization code twice. - if (g_init_gtest_count != 1) return; - - if (*argc <= 0) return; - - internal::g_executable_path = internal::StreamableToString(argv[0]); - -#if GTEST_HAS_DEATH_TEST - - g_argvs.clear(); - for (int i = 0; i != *argc; i++) { - g_argvs.push_back(StreamableToString(argv[i])); - } - -#endif // GTEST_HAS_DEATH_TEST - - ParseGoogleTestFlagsOnly(argc, argv); - GetUnitTestImpl()->PostFlagParsingInit(); -} - -} // namespace internal - -// Initializes Google Test. This must be called before calling -// RUN_ALL_TESTS(). In particular, it parses a command line for the -// flags that Google Test recognizes. Whenever a Google Test flag is -// seen, it is removed from argv, and *argc is decremented. -// -// No value is returned. Instead, the Google Test flag variables are -// updated. -// -// Calling the function for the second time has no user-visible effect. -void InitGoogleTest(int* argc, char** argv) { - internal::InitGoogleTestImpl(argc, argv); -} - -// This overloaded version can be used in Windows programs compiled in -// UNICODE mode. -void InitGoogleTest(int* argc, wchar_t** argv) { - internal::InitGoogleTestImpl(argc, argv); -} - -} // namespace testing -// Copyright 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev) -// -// This file implements death tests. - - -#if GTEST_HAS_DEATH_TEST - -# if GTEST_OS_MAC -# include -# endif // GTEST_OS_MAC - -# include -# include -# include -# include - -# if GTEST_OS_WINDOWS -# include -# else -# include -# include -# endif // GTEST_OS_WINDOWS - -#endif // GTEST_HAS_DEATH_TEST - - -// Indicates that this translation unit is part of Google Test's -// implementation. It must come before gtest-internal-inl.h is -// included, or there will be a compiler error. This trick is to -// prevent a user from accidentally including gtest-internal-inl.h in -// his code. -#define GTEST_IMPLEMENTATION_ 1 -#undef GTEST_IMPLEMENTATION_ - -namespace testing { - -// Constants. - -// The default death test style. -static const char kDefaultDeathTestStyle[] = "fast"; - -GTEST_DEFINE_string_( - death_test_style, - internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle), - "Indicates how to run a death test in a forked child process: " - "\"threadsafe\" (child process re-executes the test binary " - "from the beginning, running only the specific death test) or " - "\"fast\" (child process runs the death test immediately " - "after forking)."); - -GTEST_DEFINE_bool_( - death_test_use_fork, - internal::BoolFromGTestEnv("death_test_use_fork", false), - "Instructs to use fork()/_exit() instead of clone() in death tests. " - "Ignored and always uses fork() on POSIX systems where clone() is not " - "implemented. Useful when running under valgrind or similar tools if " - "those do not support clone(). Valgrind 3.3.1 will just fail if " - "it sees an unsupported combination of clone() flags. " - "It is not recommended to use this flag w/o valgrind though it will " - "work in 99% of the cases. Once valgrind is fixed, this flag will " - "most likely be removed."); - -namespace internal { -GTEST_DEFINE_string_( - internal_run_death_test, "", - "Indicates the file, line number, temporal index of " - "the single death test to run, and a file descriptor to " - "which a success code may be sent, all separated by " - "colons. This flag is specified if and only if the current " - "process is a sub-process launched for running a thread-safe " - "death test. FOR INTERNAL USE ONLY."); -} // namespace internal - -#if GTEST_HAS_DEATH_TEST - -// ExitedWithCode constructor. -ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) { -} - -// ExitedWithCode function-call operator. -bool ExitedWithCode::operator()(int exit_status) const { -# if GTEST_OS_WINDOWS - - return exit_status == exit_code_; - -# else - - return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_; - -# endif // GTEST_OS_WINDOWS -} - -# if !GTEST_OS_WINDOWS -// KilledBySignal constructor. -KilledBySignal::KilledBySignal(int signum) : signum_(signum) { -} - -// KilledBySignal function-call operator. -bool KilledBySignal::operator()(int exit_status) const { - return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_; -} -# endif // !GTEST_OS_WINDOWS - -namespace internal { - -// Utilities needed for death tests. - -// Generates a textual description of a given exit code, in the format -// specified by wait(2). -static String ExitSummary(int exit_code) { - Message m; - -# if GTEST_OS_WINDOWS - - m << "Exited with exit status " << exit_code; - -# else - - if (WIFEXITED(exit_code)) { - m << "Exited with exit status " << WEXITSTATUS(exit_code); - } else if (WIFSIGNALED(exit_code)) { - m << "Terminated by signal " << WTERMSIG(exit_code); - } -# ifdef WCOREDUMP - if (WCOREDUMP(exit_code)) { - m << " (core dumped)"; - } -# endif -# endif // GTEST_OS_WINDOWS - - return m.GetString(); -} - -// Returns true if exit_status describes a process that was terminated -// by a signal, or exited normally with a nonzero exit code. -bool ExitedUnsuccessfully(int exit_status) { - return !ExitedWithCode(0)(exit_status); -} - -# if !GTEST_OS_WINDOWS -// Generates a textual failure message when a death test finds more than -// one thread running, or cannot determine the number of threads, prior -// to executing the given statement. It is the responsibility of the -// caller not to pass a thread_count of 1. -static String DeathTestThreadWarning(size_t thread_count) { - Message msg; - msg << "Death tests use fork(), which is unsafe particularly" - << " in a threaded context. For this test, " << GTEST_NAME_ << " "; - if (thread_count == 0) - msg << "couldn't detect the number of threads."; - else - msg << "detected " << thread_count << " threads."; - return msg.GetString(); -} -# endif // !GTEST_OS_WINDOWS - -// Flag characters for reporting a death test that did not die. -static const char kDeathTestLived = 'L'; -static const char kDeathTestReturned = 'R'; -static const char kDeathTestThrew = 'T'; -static const char kDeathTestInternalError = 'I'; - -// An enumeration describing all of the possible ways that a death test can -// conclude. DIED means that the process died while executing the test -// code; LIVED means that process lived beyond the end of the test code; -// RETURNED means that the test statement attempted to execute a return -// statement, which is not allowed; THREW means that the test statement -// returned control by throwing an exception. IN_PROGRESS means the test -// has not yet concluded. -// TODO(vladl@google.com): Unify names and possibly values for -// AbortReason, DeathTestOutcome, and flag characters above. -enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW }; - -// Routine for aborting the program which is safe to call from an -// exec-style death test child process, in which case the error -// message is propagated back to the parent process. Otherwise, the -// message is simply printed to stderr. In either case, the program -// then exits with status 1. -void DeathTestAbort(const String& message) { - // On a POSIX system, this function may be called from a threadsafe-style - // death test child process, which operates on a very small stack. Use - // the heap for any additional non-minuscule memory requirements. - const InternalRunDeathTestFlag* const flag = - GetUnitTestImpl()->internal_run_death_test_flag(); - if (flag != NULL) { - FILE* parent = posix::FDOpen(flag->write_fd(), "w"); - fputc(kDeathTestInternalError, parent); - fprintf(parent, "%s", message.c_str()); - fflush(parent); - _exit(1); - } else { - fprintf(stderr, "%s", message.c_str()); - fflush(stderr); - posix::Abort(); - } -} - -// A replacement for CHECK that calls DeathTestAbort if the assertion -// fails. -# define GTEST_DEATH_TEST_CHECK_(expression) \ - do { \ - if (!::testing::internal::IsTrue(expression)) { \ - DeathTestAbort(::testing::internal::String::Format( \ - "CHECK failed: File %s, line %d: %s", \ - __FILE__, __LINE__, #expression)); \ - } \ - } while (::testing::internal::AlwaysFalse()) - -// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for -// evaluating any system call that fulfills two conditions: it must return -// -1 on failure, and set errno to EINTR when it is interrupted and -// should be tried again. The macro expands to a loop that repeatedly -// evaluates the expression as long as it evaluates to -1 and sets -// errno to EINTR. If the expression evaluates to -1 but errno is -// something other than EINTR, DeathTestAbort is called. -# define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \ - do { \ - int gtest_retval; \ - do { \ - gtest_retval = (expression); \ - } while (gtest_retval == -1 && errno == EINTR); \ - if (gtest_retval == -1) { \ - DeathTestAbort(::testing::internal::String::Format( \ - "CHECK failed: File %s, line %d: %s != -1", \ - __FILE__, __LINE__, #expression)); \ - } \ - } while (::testing::internal::AlwaysFalse()) - -// Returns the message describing the last system error in errno. -String GetLastErrnoDescription() { - return String(errno == 0 ? "" : posix::StrError(errno)); -} - -// This is called from a death test parent process to read a failure -// message from the death test child process and log it with the FATAL -// severity. On Windows, the message is read from a pipe handle. On other -// platforms, it is read from a file descriptor. -static void FailFromInternalError(int fd) { - Message error; - char buffer[256]; - int num_read; - - do { - while ((num_read = posix::Read(fd, buffer, 255)) > 0) { - buffer[num_read] = '\0'; - error << buffer; - } - } while (num_read == -1 && errno == EINTR); - - if (num_read == 0) { - GTEST_LOG_(FATAL) << error.GetString(); - } else { - const int last_error = errno; - GTEST_LOG_(FATAL) << "Error while reading death test internal: " - << GetLastErrnoDescription() << " [" << last_error << "]"; - } -} - -// Death test constructor. Increments the running death test count -// for the current test. -DeathTest::DeathTest() { - TestInfo* const info = GetUnitTestImpl()->current_test_info(); - if (info == NULL) { - DeathTestAbort("Cannot run a death test outside of a TEST or " - "TEST_F construct"); - } -} - -// Creates and returns a death test by dispatching to the current -// death test factory. -bool DeathTest::Create(const char* statement, const RE* regex, - const char* file, int line, DeathTest** test) { - return GetUnitTestImpl()->death_test_factory()->Create( - statement, regex, file, line, test); -} - -const char* DeathTest::LastMessage() { - return last_death_test_message_.c_str(); -} - -void DeathTest::set_last_death_test_message(const String& message) { - last_death_test_message_ = message; -} - -String DeathTest::last_death_test_message_; - -// Provides cross platform implementation for some death functionality. -class DeathTestImpl : public DeathTest { - protected: - DeathTestImpl(const char* a_statement, const RE* a_regex) - : statement_(a_statement), - regex_(a_regex), - spawned_(false), - status_(-1), - outcome_(IN_PROGRESS), - read_fd_(-1), - write_fd_(-1) {} - - // read_fd_ is expected to be closed and cleared by a derived class. - ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); } - - void Abort(AbortReason reason); - virtual bool Passed(bool status_ok); - - const char* statement() const { return statement_; } - const RE* regex() const { return regex_; } - bool spawned() const { return spawned_; } - void set_spawned(bool is_spawned) { spawned_ = is_spawned; } - int status() const { return status_; } - void set_status(int a_status) { status_ = a_status; } - DeathTestOutcome outcome() const { return outcome_; } - void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; } - int read_fd() const { return read_fd_; } - void set_read_fd(int fd) { read_fd_ = fd; } - int write_fd() const { return write_fd_; } - void set_write_fd(int fd) { write_fd_ = fd; } - - // Called in the parent process only. Reads the result code of the death - // test child process via a pipe, interprets it to set the outcome_ - // member, and closes read_fd_. Outputs diagnostics and terminates in - // case of unexpected codes. - void ReadAndInterpretStatusByte(); - - private: - // The textual content of the code this object is testing. This class - // doesn't own this string and should not attempt to delete it. - const char* const statement_; - // The regular expression which test output must match. DeathTestImpl - // doesn't own this object and should not attempt to delete it. - const RE* const regex_; - // True if the death test child process has been successfully spawned. - bool spawned_; - // The exit status of the child process. - int status_; - // How the death test concluded. - DeathTestOutcome outcome_; - // Descriptor to the read end of the pipe to the child process. It is - // always -1 in the child process. The child keeps its write end of the - // pipe in write_fd_. - int read_fd_; - // Descriptor to the child's write end of the pipe to the parent process. - // It is always -1 in the parent process. The parent keeps its end of the - // pipe in read_fd_. - int write_fd_; -}; - -// Called in the parent process only. Reads the result code of the death -// test child process via a pipe, interprets it to set the outcome_ -// member, and closes read_fd_. Outputs diagnostics and terminates in -// case of unexpected codes. -void DeathTestImpl::ReadAndInterpretStatusByte() { - char flag; - int bytes_read; - - // The read() here blocks until data is available (signifying the - // failure of the death test) or until the pipe is closed (signifying - // its success), so it's okay to call this in the parent before - // the child process has exited. - do { - bytes_read = posix::Read(read_fd(), &flag, 1); - } while (bytes_read == -1 && errno == EINTR); - - if (bytes_read == 0) { - set_outcome(DIED); - } else if (bytes_read == 1) { - switch (flag) { - case kDeathTestReturned: - set_outcome(RETURNED); - break; - case kDeathTestThrew: - set_outcome(THREW); - break; - case kDeathTestLived: - set_outcome(LIVED); - break; - case kDeathTestInternalError: - FailFromInternalError(read_fd()); // Does not return. - break; - default: - GTEST_LOG_(FATAL) << "Death test child process reported " - << "unexpected status byte (" - << static_cast(flag) << ")"; - } - } else { - GTEST_LOG_(FATAL) << "Read from death test child process failed: " - << GetLastErrnoDescription(); - } - GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd())); - set_read_fd(-1); -} - -// Signals that the death test code which should have exited, didn't. -// Should be called only in a death test child process. -// Writes a status byte to the child's status file descriptor, then -// calls _exit(1). -void DeathTestImpl::Abort(AbortReason reason) { - // The parent process considers the death test to be a failure if - // it finds any data in our pipe. So, here we write a single flag byte - // to the pipe, then exit. - const char status_ch = - reason == TEST_DID_NOT_DIE ? kDeathTestLived : - reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned; - - GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1)); - // We are leaking the descriptor here because on some platforms (i.e., - // when built as Windows DLL), destructors of global objects will still - // run after calling _exit(). On such systems, write_fd_ will be - // indirectly closed from the destructor of UnitTestImpl, causing double - // close if it is also closed here. On debug configurations, double close - // may assert. As there are no in-process buffers to flush here, we are - // relying on the OS to close the descriptor after the process terminates - // when the destructors are not run. - _exit(1); // Exits w/o any normal exit hooks (we were supposed to crash) -} - -// Returns an indented copy of stderr output for a death test. -// This makes distinguishing death test output lines from regular log lines -// much easier. -static ::std::string FormatDeathTestOutput(const ::std::string& output) { - ::std::string ret; - for (size_t at = 0; ; ) { - const size_t line_end = output.find('\n', at); - ret += "[ DEATH ] "; - if (line_end == ::std::string::npos) { - ret += output.substr(at); - break; - } - ret += output.substr(at, line_end + 1 - at); - at = line_end + 1; - } - return ret; -} - -// Assesses the success or failure of a death test, using both private -// members which have previously been set, and one argument: -// -// Private data members: -// outcome: An enumeration describing how the death test -// concluded: DIED, LIVED, THREW, or RETURNED. The death test -// fails in the latter three cases. -// status: The exit status of the child process. On *nix, it is in the -// in the format specified by wait(2). On Windows, this is the -// value supplied to the ExitProcess() API or a numeric code -// of the exception that terminated the program. -// regex: A regular expression object to be applied to -// the test's captured standard error output; the death test -// fails if it does not match. -// -// Argument: -// status_ok: true if exit_status is acceptable in the context of -// this particular death test, which fails if it is false -// -// Returns true iff all of the above conditions are met. Otherwise, the -// first failing condition, in the order given above, is the one that is -// reported. Also sets the last death test message string. -bool DeathTestImpl::Passed(bool status_ok) { - if (!spawned()) - return false; - - const String error_message = GetCapturedStderr(); - - bool success = false; - Message buffer; - - buffer << "Death test: " << statement() << "\n"; - switch (outcome()) { - case LIVED: - buffer << " Result: failed to die.\n" - << " Error msg:\n" << FormatDeathTestOutput(error_message); - break; - case THREW: - buffer << " Result: threw an exception.\n" - << " Error msg:\n" << FormatDeathTestOutput(error_message); - break; - case RETURNED: - buffer << " Result: illegal return in test statement.\n" - << " Error msg:\n" << FormatDeathTestOutput(error_message); - break; - case DIED: - if (status_ok) { - const bool matched = RE::PartialMatch(error_message.c_str(), *regex()); - if (matched) { - success = true; - } else { - buffer << " Result: died but not with expected error.\n" - << " Expected: " << regex()->pattern() << "\n" - << "Actual msg:\n" << FormatDeathTestOutput(error_message); - } - } else { - buffer << " Result: died but not with expected exit code:\n" - << " " << ExitSummary(status()) << "\n" - << "Actual msg:\n" << FormatDeathTestOutput(error_message); - } - break; - case IN_PROGRESS: - default: - GTEST_LOG_(FATAL) - << "DeathTest::Passed somehow called before conclusion of test"; - } - - DeathTest::set_last_death_test_message(buffer.GetString()); - return success; -} - -# if GTEST_OS_WINDOWS -// WindowsDeathTest implements death tests on Windows. Due to the -// specifics of starting new processes on Windows, death tests there are -// always threadsafe, and Google Test considers the -// --gtest_death_test_style=fast setting to be equivalent to -// --gtest_death_test_style=threadsafe there. -// -// A few implementation notes: Like the Linux version, the Windows -// implementation uses pipes for child-to-parent communication. But due to -// the specifics of pipes on Windows, some extra steps are required: -// -// 1. The parent creates a communication pipe and stores handles to both -// ends of it. -// 2. The parent starts the child and provides it with the information -// necessary to acquire the handle to the write end of the pipe. -// 3. The child acquires the write end of the pipe and signals the parent -// using a Windows event. -// 4. Now the parent can release the write end of the pipe on its side. If -// this is done before step 3, the object's reference count goes down to -// 0 and it is destroyed, preventing the child from acquiring it. The -// parent now has to release it, or read operations on the read end of -// the pipe will not return when the child terminates. -// 5. The parent reads child's output through the pipe (outcome code and -// any possible error messages) from the pipe, and its stderr and then -// determines whether to fail the test. -// -// Note: to distinguish Win32 API calls from the local method and function -// calls, the former are explicitly resolved in the global namespace. -// -class WindowsDeathTest : public DeathTestImpl { - public: - WindowsDeathTest(const char* a_statement, - const RE* a_regex, - const char* file, - int line) - : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {} - - // All of these virtual functions are inherited from DeathTest. - virtual int Wait(); - virtual TestRole AssumeRole(); - - private: - // The name of the file in which the death test is located. - const char* const file_; - // The line number on which the death test is located. - const int line_; - // Handle to the write end of the pipe to the child process. - AutoHandle write_handle_; - // Child process handle. - AutoHandle child_handle_; - // Event the child process uses to signal the parent that it has - // acquired the handle to the write end of the pipe. After seeing this - // event the parent can release its own handles to make sure its - // ReadFile() calls return when the child terminates. - AutoHandle event_handle_; -}; - -// Waits for the child in a death test to exit, returning its exit -// status, or 0 if no child process exists. As a side effect, sets the -// outcome data member. -int WindowsDeathTest::Wait() { - if (!spawned()) - return 0; - - // Wait until the child either signals that it has acquired the write end - // of the pipe or it dies. - const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() }; - switch (::WaitForMultipleObjects(2, - wait_handles, - FALSE, // Waits for any of the handles. - INFINITE)) { - case WAIT_OBJECT_0: - case WAIT_OBJECT_0 + 1: - break; - default: - GTEST_DEATH_TEST_CHECK_(false); // Should not get here. - } - - // The child has acquired the write end of the pipe or exited. - // We release the handle on our side and continue. - write_handle_.Reset(); - event_handle_.Reset(); - - ReadAndInterpretStatusByte(); - - // Waits for the child process to exit if it haven't already. This - // returns immediately if the child has already exited, regardless of - // whether previous calls to WaitForMultipleObjects synchronized on this - // handle or not. - GTEST_DEATH_TEST_CHECK_( - WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(), - INFINITE)); - DWORD status_code; - GTEST_DEATH_TEST_CHECK_( - ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE); - child_handle_.Reset(); - set_status(static_cast(status_code)); - return status(); -} - -// The AssumeRole process for a Windows death test. It creates a child -// process with the same executable as the current process to run the -// death test. The child process is given the --gtest_filter and -// --gtest_internal_run_death_test flags such that it knows to run the -// current death test only. -DeathTest::TestRole WindowsDeathTest::AssumeRole() { - const UnitTestImpl* const impl = GetUnitTestImpl(); - const InternalRunDeathTestFlag* const flag = - impl->internal_run_death_test_flag(); - const TestInfo* const info = impl->current_test_info(); - const int death_test_index = info->result()->death_test_count(); - - if (flag != NULL) { - // ParseInternalRunDeathTestFlag() has performed all the necessary - // processing. - set_write_fd(flag->write_fd()); - return EXECUTE_TEST; - } - - // WindowsDeathTest uses an anonymous pipe to communicate results of - // a death test. - SECURITY_ATTRIBUTES handles_are_inheritable = { - sizeof(SECURITY_ATTRIBUTES), NULL, TRUE }; - HANDLE read_handle, write_handle; - GTEST_DEATH_TEST_CHECK_( - ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable, - 0) // Default buffer size. - != FALSE); - set_read_fd(::_open_osfhandle(reinterpret_cast(read_handle), - O_RDONLY)); - write_handle_.Reset(write_handle); - event_handle_.Reset(::CreateEvent( - &handles_are_inheritable, - TRUE, // The event will automatically reset to non-signaled state. - FALSE, // The initial state is non-signalled. - NULL)); // The even is unnamed. - GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL); - const String filter_flag = String::Format("--%s%s=%s.%s", - GTEST_FLAG_PREFIX_, kFilterFlag, - info->test_case_name(), - info->name()); - const String internal_flag = String::Format( - "--%s%s=%s|%d|%d|%u|%Iu|%Iu", - GTEST_FLAG_PREFIX_, - kInternalRunDeathTestFlag, - file_, line_, - death_test_index, - static_cast(::GetCurrentProcessId()), - // size_t has the same with as pointers on both 32-bit and 64-bit - // Windows platforms. - // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx. - reinterpret_cast(write_handle), - reinterpret_cast(event_handle_.Get())); - - char executable_path[_MAX_PATH + 1]; // NOLINT - GTEST_DEATH_TEST_CHECK_( - _MAX_PATH + 1 != ::GetModuleFileNameA(NULL, - executable_path, - _MAX_PATH)); - - String command_line = String::Format("%s %s \"%s\"", - ::GetCommandLineA(), - filter_flag.c_str(), - internal_flag.c_str()); - - DeathTest::set_last_death_test_message(""); - - CaptureStderr(); - // Flush the log buffers since the log streams are shared with the child. - FlushInfoLog(); - - // The child process will share the standard handles with the parent. - STARTUPINFOA startup_info; - memset(&startup_info, 0, sizeof(STARTUPINFO)); - startup_info.dwFlags = STARTF_USESTDHANDLES; - startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE); - startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE); - startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE); - - PROCESS_INFORMATION process_info; - GTEST_DEATH_TEST_CHECK_(::CreateProcessA( - executable_path, - const_cast(command_line.c_str()), - NULL, // Retuned process handle is not inheritable. - NULL, // Retuned thread handle is not inheritable. - TRUE, // Child inherits all inheritable handles (for write_handle_). - 0x0, // Default creation flags. - NULL, // Inherit the parent's environment. - UnitTest::GetInstance()->original_working_dir(), - &startup_info, - &process_info) != FALSE); - child_handle_.Reset(process_info.hProcess); - ::CloseHandle(process_info.hThread); - set_spawned(true); - return OVERSEE_TEST; -} -# else // We are not on Windows. - -// ForkingDeathTest provides implementations for most of the abstract -// methods of the DeathTest interface. Only the AssumeRole method is -// left undefined. -class ForkingDeathTest : public DeathTestImpl { - public: - ForkingDeathTest(const char* statement, const RE* regex); - - // All of these virtual functions are inherited from DeathTest. - virtual int Wait(); - - protected: - void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; } - - private: - // PID of child process during death test; 0 in the child process itself. - pid_t child_pid_; -}; - -// Constructs a ForkingDeathTest. -ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex) - : DeathTestImpl(a_statement, a_regex), - child_pid_(-1) {} - -// Waits for the child in a death test to exit, returning its exit -// status, or 0 if no child process exists. As a side effect, sets the -// outcome data member. -int ForkingDeathTest::Wait() { - if (!spawned()) - return 0; - - ReadAndInterpretStatusByte(); - - int status_value; - GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0)); - set_status(status_value); - return status_value; -} - -// A concrete death test class that forks, then immediately runs the test -// in the child process. -class NoExecDeathTest : public ForkingDeathTest { - public: - NoExecDeathTest(const char* a_statement, const RE* a_regex) : - ForkingDeathTest(a_statement, a_regex) { } - virtual TestRole AssumeRole(); -}; - -// The AssumeRole process for a fork-and-run death test. It implements a -// straightforward fork, with a simple pipe to transmit the status byte. -DeathTest::TestRole NoExecDeathTest::AssumeRole() { - const size_t thread_count = GetThreadCount(); - if (thread_count != 1) { - GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count); - } - - int pipe_fd[2]; - GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); - - DeathTest::set_last_death_test_message(""); - CaptureStderr(); - // When we fork the process below, the log file buffers are copied, but the - // file descriptors are shared. We flush all log files here so that closing - // the file descriptors in the child process doesn't throw off the - // synchronization between descriptors and buffers in the parent process. - // This is as close to the fork as possible to avoid a race condition in case - // there are multiple threads running before the death test, and another - // thread writes to the log file. - FlushInfoLog(); - - const pid_t child_pid = fork(); - GTEST_DEATH_TEST_CHECK_(child_pid != -1); - set_child_pid(child_pid); - if (child_pid == 0) { - GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0])); - set_write_fd(pipe_fd[1]); - // Redirects all logging to stderr in the child process to prevent - // concurrent writes to the log files. We capture stderr in the parent - // process and append the child process' output to a log. - LogToStderr(); - // Event forwarding to the listeners of event listener API mush be shut - // down in death test subprocesses. - GetUnitTestImpl()->listeners()->SuppressEventForwarding(); - return EXECUTE_TEST; - } else { - GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); - set_read_fd(pipe_fd[0]); - set_spawned(true); - return OVERSEE_TEST; - } -} - -// A concrete death test class that forks and re-executes the main -// program from the beginning, with command-line flags set that cause -// only this specific death test to be run. -class ExecDeathTest : public ForkingDeathTest { - public: - ExecDeathTest(const char* a_statement, const RE* a_regex, - const char* file, int line) : - ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { } - virtual TestRole AssumeRole(); - private: - // The name of the file in which the death test is located. - const char* const file_; - // The line number on which the death test is located. - const int line_; -}; - -// Utility class for accumulating command-line arguments. -class Arguments { - public: - Arguments() { - args_.push_back(NULL); - } - - ~Arguments() { - for (std::vector::iterator i = args_.begin(); i != args_.end(); - ++i) { - free(*i); - } - } - void AddArgument(const char* argument) { - args_.insert(args_.end() - 1, posix::StrDup(argument)); - } - - template - void AddArguments(const ::std::vector& arguments) { - for (typename ::std::vector::const_iterator i = arguments.begin(); - i != arguments.end(); - ++i) { - args_.insert(args_.end() - 1, posix::StrDup(i->c_str())); - } - } - char* const* Argv() { - return &args_[0]; - } - private: - std::vector args_; -}; - -// A struct that encompasses the arguments to the child process of a -// threadsafe-style death test process. -struct ExecDeathTestArgs { - char* const* argv; // Command-line arguments for the child's call to exec - int close_fd; // File descriptor to close; the read end of a pipe -}; - -# if GTEST_OS_MAC -inline char** GetEnviron() { - // When Google Test is built as a framework on MacOS X, the environ variable - // is unavailable. Apple's documentation (man environ) recommends using - // _NSGetEnviron() instead. - return *_NSGetEnviron(); -} -# else -// Some POSIX platforms expect you to declare environ. extern "C" makes -// it reside in the global namespace. -extern "C" char** environ; -inline char** GetEnviron() { return environ; } -# endif // GTEST_OS_MAC - -// The main function for a threadsafe-style death test child process. -// This function is called in a clone()-ed process and thus must avoid -// any potentially unsafe operations like malloc or libc functions. -static int ExecDeathTestChildMain(void* child_arg) { - ExecDeathTestArgs* const args = static_cast(child_arg); - GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd)); - - // We need to execute the test program in the same environment where - // it was originally invoked. Therefore we change to the original - // working directory first. - const char* const original_dir = - UnitTest::GetInstance()->original_working_dir(); - // We can safely call chdir() as it's a direct system call. - if (chdir(original_dir) != 0) { - DeathTestAbort(String::Format("chdir(\"%s\") failed: %s", - original_dir, - GetLastErrnoDescription().c_str())); - return EXIT_FAILURE; - } - - // We can safely call execve() as it's a direct system call. We - // cannot use execvp() as it's a libc function and thus potentially - // unsafe. Since execve() doesn't search the PATH, the user must - // invoke the test program via a valid path that contains at least - // one path separator. - execve(args->argv[0], args->argv, GetEnviron()); - DeathTestAbort(String::Format("execve(%s, ...) in %s failed: %s", - args->argv[0], - original_dir, - GetLastErrnoDescription().c_str())); - return EXIT_FAILURE; -} - -// Two utility routines that together determine the direction the stack -// grows. -// This could be accomplished more elegantly by a single recursive -// function, but we want to guard against the unlikely possibility of -// a smart compiler optimizing the recursion away. -// -// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining -// StackLowerThanAddress into StackGrowsDown, which then doesn't give -// correct answer. -bool StackLowerThanAddress(const void* ptr) GTEST_NO_INLINE_; -bool StackLowerThanAddress(const void* ptr) { - int dummy; - return &dummy < ptr; -} - -bool StackGrowsDown() { - int dummy; - return StackLowerThanAddress(&dummy); -} - -// A threadsafe implementation of fork(2) for threadsafe-style death tests -// that uses clone(2). It dies with an error message if anything goes -// wrong. -static pid_t ExecDeathTestFork(char* const* argv, int close_fd) { - ExecDeathTestArgs args = { argv, close_fd }; - pid_t child_pid = -1; - -# if GTEST_HAS_CLONE - const bool use_fork = GTEST_FLAG(death_test_use_fork); - - if (!use_fork) { - static const bool stack_grows_down = StackGrowsDown(); - const size_t stack_size = getpagesize(); - // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead. - void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); - GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED); - void* const stack_top = - static_cast(stack) + (stack_grows_down ? stack_size : 0); - - child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args); - - GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1); - } -# else - const bool use_fork = true; -# endif // GTEST_HAS_CLONE - - if (use_fork && (child_pid = fork()) == 0) { - ExecDeathTestChildMain(&args); - _exit(0); - } - - GTEST_DEATH_TEST_CHECK_(child_pid != -1); - return child_pid; -} - -// The AssumeRole process for a fork-and-exec death test. It re-executes the -// main program from the beginning, setting the --gtest_filter -// and --gtest_internal_run_death_test flags to cause only the current -// death test to be re-run. -DeathTest::TestRole ExecDeathTest::AssumeRole() { - const UnitTestImpl* const impl = GetUnitTestImpl(); - const InternalRunDeathTestFlag* const flag = - impl->internal_run_death_test_flag(); - const TestInfo* const info = impl->current_test_info(); - const int death_test_index = info->result()->death_test_count(); - - if (flag != NULL) { - set_write_fd(flag->write_fd()); - return EXECUTE_TEST; - } - - int pipe_fd[2]; - GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); - // Clear the close-on-exec flag on the write end of the pipe, lest - // it be closed when the child process does an exec: - GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1); - - const String filter_flag = - String::Format("--%s%s=%s.%s", - GTEST_FLAG_PREFIX_, kFilterFlag, - info->test_case_name(), info->name()); - const String internal_flag = - String::Format("--%s%s=%s|%d|%d|%d", - GTEST_FLAG_PREFIX_, kInternalRunDeathTestFlag, - file_, line_, death_test_index, pipe_fd[1]); - Arguments args; - args.AddArguments(GetArgvs()); - args.AddArgument(filter_flag.c_str()); - args.AddArgument(internal_flag.c_str()); - - DeathTest::set_last_death_test_message(""); - - CaptureStderr(); - // See the comment in NoExecDeathTest::AssumeRole for why the next line - // is necessary. - FlushInfoLog(); - - const pid_t child_pid = ExecDeathTestFork(args.Argv(), pipe_fd[0]); - GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); - set_child_pid(child_pid); - set_read_fd(pipe_fd[0]); - set_spawned(true); - return OVERSEE_TEST; -} - -# endif // !GTEST_OS_WINDOWS - -// Creates a concrete DeathTest-derived class that depends on the -// --gtest_death_test_style flag, and sets the pointer pointed to -// by the "test" argument to its address. If the test should be -// skipped, sets that pointer to NULL. Returns true, unless the -// flag is set to an invalid value. -bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex, - const char* file, int line, - DeathTest** test) { - UnitTestImpl* const impl = GetUnitTestImpl(); - const InternalRunDeathTestFlag* const flag = - impl->internal_run_death_test_flag(); - const int death_test_index = impl->current_test_info() - ->increment_death_test_count(); - - if (flag != NULL) { - if (death_test_index > flag->index()) { - DeathTest::set_last_death_test_message(String::Format( - "Death test count (%d) somehow exceeded expected maximum (%d)", - death_test_index, flag->index())); - return false; - } - - if (!(flag->file() == file && flag->line() == line && - flag->index() == death_test_index)) { - *test = NULL; - return true; - } - } - -# if GTEST_OS_WINDOWS - - if (GTEST_FLAG(death_test_style) == "threadsafe" || - GTEST_FLAG(death_test_style) == "fast") { - *test = new WindowsDeathTest(statement, regex, file, line); - } - -# else - - if (GTEST_FLAG(death_test_style) == "threadsafe") { - *test = new ExecDeathTest(statement, regex, file, line); - } else if (GTEST_FLAG(death_test_style) == "fast") { - *test = new NoExecDeathTest(statement, regex); - } - -# endif // GTEST_OS_WINDOWS - - else { // NOLINT - this is more readable than unbalanced brackets inside #if. - DeathTest::set_last_death_test_message(String::Format( - "Unknown death test style \"%s\" encountered", - GTEST_FLAG(death_test_style).c_str())); - return false; - } - - return true; -} - -// Splits a given string on a given delimiter, populating a given -// vector with the fields. GTEST_HAS_DEATH_TEST implies that we have -// ::std::string, so we can use it here. -static void SplitString(const ::std::string& str, char delimiter, - ::std::vector< ::std::string>* dest) { - ::std::vector< ::std::string> parsed; - ::std::string::size_type pos = 0; - while (::testing::internal::AlwaysTrue()) { - const ::std::string::size_type colon = str.find(delimiter, pos); - if (colon == ::std::string::npos) { - parsed.push_back(str.substr(pos)); - break; - } else { - parsed.push_back(str.substr(pos, colon - pos)); - pos = colon + 1; - } - } - dest->swap(parsed); -} - -# if GTEST_OS_WINDOWS -// Recreates the pipe and event handles from the provided parameters, -// signals the event, and returns a file descriptor wrapped around the pipe -// handle. This function is called in the child process only. -int GetStatusFileDescriptor(unsigned int parent_process_id, - size_t write_handle_as_size_t, - size_t event_handle_as_size_t) { - AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE, - FALSE, // Non-inheritable. - parent_process_id)); - if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) { - DeathTestAbort(String::Format("Unable to open parent process %u", - parent_process_id)); - } - - // TODO(vladl@google.com): Replace the following check with a - // compile-time assertion when available. - GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t)); - - const HANDLE write_handle = - reinterpret_cast(write_handle_as_size_t); - HANDLE dup_write_handle; - - // The newly initialized handle is accessible only in in the parent - // process. To obtain one accessible within the child, we need to use - // DuplicateHandle. - if (!::DuplicateHandle(parent_process_handle.Get(), write_handle, - ::GetCurrentProcess(), &dup_write_handle, - 0x0, // Requested privileges ignored since - // DUPLICATE_SAME_ACCESS is used. - FALSE, // Request non-inheritable handler. - DUPLICATE_SAME_ACCESS)) { - DeathTestAbort(String::Format( - "Unable to duplicate the pipe handle %Iu from the parent process %u", - write_handle_as_size_t, parent_process_id)); - } - - const HANDLE event_handle = reinterpret_cast(event_handle_as_size_t); - HANDLE dup_event_handle; - - if (!::DuplicateHandle(parent_process_handle.Get(), event_handle, - ::GetCurrentProcess(), &dup_event_handle, - 0x0, - FALSE, - DUPLICATE_SAME_ACCESS)) { - DeathTestAbort(String::Format( - "Unable to duplicate the event handle %Iu from the parent process %u", - event_handle_as_size_t, parent_process_id)); - } - - const int write_fd = - ::_open_osfhandle(reinterpret_cast(dup_write_handle), O_APPEND); - if (write_fd == -1) { - DeathTestAbort(String::Format( - "Unable to convert pipe handle %Iu to a file descriptor", - write_handle_as_size_t)); - } - - // Signals the parent that the write end of the pipe has been acquired - // so the parent can release its own write end. - ::SetEvent(dup_event_handle); - - return write_fd; -} -# endif // GTEST_OS_WINDOWS - -// Returns a newly created InternalRunDeathTestFlag object with fields -// initialized from the GTEST_FLAG(internal_run_death_test) flag if -// the flag is specified; otherwise returns NULL. -InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() { - if (GTEST_FLAG(internal_run_death_test) == "") return NULL; - - // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we - // can use it here. - int line = -1; - int index = -1; - ::std::vector< ::std::string> fields; - SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields); - int write_fd = -1; - -# if GTEST_OS_WINDOWS - - unsigned int parent_process_id = 0; - size_t write_handle_as_size_t = 0; - size_t event_handle_as_size_t = 0; - - if (fields.size() != 6 - || !ParseNaturalNumber(fields[1], &line) - || !ParseNaturalNumber(fields[2], &index) - || !ParseNaturalNumber(fields[3], &parent_process_id) - || !ParseNaturalNumber(fields[4], &write_handle_as_size_t) - || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) { - DeathTestAbort(String::Format( - "Bad --gtest_internal_run_death_test flag: %s", - GTEST_FLAG(internal_run_death_test).c_str())); - } - write_fd = GetStatusFileDescriptor(parent_process_id, - write_handle_as_size_t, - event_handle_as_size_t); -# else - - if (fields.size() != 4 - || !ParseNaturalNumber(fields[1], &line) - || !ParseNaturalNumber(fields[2], &index) - || !ParseNaturalNumber(fields[3], &write_fd)) { - DeathTestAbort(String::Format( - "Bad --gtest_internal_run_death_test flag: %s", - GTEST_FLAG(internal_run_death_test).c_str())); - } - -# endif // GTEST_OS_WINDOWS - - return new InternalRunDeathTestFlag(fields[0], line, index, write_fd); -} - -} // namespace internal - -#endif // GTEST_HAS_DEATH_TEST - -} // namespace testing -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Authors: keith.ray@gmail.com (Keith Ray) - - -#include - -#if GTEST_OS_WINDOWS_MOBILE -# include -#elif GTEST_OS_WINDOWS -# include -# include -#elif GTEST_OS_SYMBIAN || GTEST_OS_NACL -// Symbian OpenC and NaCl have PATH_MAX in sys/syslimits.h -# include -#else -# include -# include // Some Linux distributions define PATH_MAX here. -#endif // GTEST_OS_WINDOWS_MOBILE - -#if GTEST_OS_WINDOWS -# define GTEST_PATH_MAX_ _MAX_PATH -#elif defined(PATH_MAX) -# define GTEST_PATH_MAX_ PATH_MAX -#elif defined(_XOPEN_PATH_MAX) -# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX -#else -# define GTEST_PATH_MAX_ _POSIX_PATH_MAX -#endif // GTEST_OS_WINDOWS - - -namespace testing { -namespace internal { - -#if GTEST_OS_WINDOWS -// On Windows, '\\' is the standard path separator, but many tools and the -// Windows API also accept '/' as an alternate path separator. Unless otherwise -// noted, a file path can contain either kind of path separators, or a mixture -// of them. -const char kPathSeparator = '\\'; -const char kAlternatePathSeparator = '/'; -const char kPathSeparatorString[] = "\\"; -const char kAlternatePathSeparatorString[] = "/"; -# if GTEST_OS_WINDOWS_MOBILE -// Windows CE doesn't have a current directory. You should not use -// the current directory in tests on Windows CE, but this at least -// provides a reasonable fallback. -const char kCurrentDirectoryString[] = "\\"; -// Windows CE doesn't define INVALID_FILE_ATTRIBUTES -const DWORD kInvalidFileAttributes = 0xffffffff; -# else -const char kCurrentDirectoryString[] = ".\\"; -# endif // GTEST_OS_WINDOWS_MOBILE -#else -const char kPathSeparator = '/'; -const char kPathSeparatorString[] = "/"; -const char kCurrentDirectoryString[] = "./"; -#endif // GTEST_OS_WINDOWS - -// Returns whether the given character is a valid path separator. -static bool IsPathSeparator(char c) { -#if GTEST_HAS_ALT_PATH_SEP_ - return (c == kPathSeparator) || (c == kAlternatePathSeparator); -#else - return c == kPathSeparator; -#endif -} - -// Returns the current working directory, or "" if unsuccessful. -FilePath FilePath::GetCurrentDir() { -#if GTEST_OS_WINDOWS_MOBILE - // Windows CE doesn't have a current directory, so we just return - // something reasonable. - return FilePath(kCurrentDirectoryString); -#elif GTEST_OS_WINDOWS - char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; - return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd); -#else - char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; - return FilePath(getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd); -#endif // GTEST_OS_WINDOWS_MOBILE -} - -// Returns a copy of the FilePath with the case-insensitive extension removed. -// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns -// FilePath("dir/file"). If a case-insensitive extension is not -// found, returns a copy of the original FilePath. -FilePath FilePath::RemoveExtension(const char* extension) const { - String dot_extension(String::Format(".%s", extension)); - if (pathname_.EndsWithCaseInsensitive(dot_extension.c_str())) { - return FilePath(String(pathname_.c_str(), pathname_.length() - 4)); - } - return *this; -} - -// Returns a pointer to the last occurence of a valid path separator in -// the FilePath. On Windows, for example, both '/' and '\' are valid path -// separators. Returns NULL if no path separator was found. -const char* FilePath::FindLastPathSeparator() const { - const char* const last_sep = strrchr(c_str(), kPathSeparator); -#if GTEST_HAS_ALT_PATH_SEP_ - const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator); - // Comparing two pointers of which only one is NULL is undefined. - if (last_alt_sep != NULL && - (last_sep == NULL || last_alt_sep > last_sep)) { - return last_alt_sep; - } -#endif - return last_sep; -} - -// Returns a copy of the FilePath with the directory part removed. -// Example: FilePath("path/to/file").RemoveDirectoryName() returns -// FilePath("file"). If there is no directory part ("just_a_file"), it returns -// the FilePath unmodified. If there is no file part ("just_a_dir/") it -// returns an empty FilePath (""). -// On Windows platform, '\' is the path separator, otherwise it is '/'. -FilePath FilePath::RemoveDirectoryName() const { - const char* const last_sep = FindLastPathSeparator(); - return last_sep ? FilePath(String(last_sep + 1)) : *this; -} - -// RemoveFileName returns the directory path with the filename removed. -// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". -// If the FilePath is "a_file" or "/a_file", RemoveFileName returns -// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does -// not have a file, like "just/a/dir/", it returns the FilePath unmodified. -// On Windows platform, '\' is the path separator, otherwise it is '/'. -FilePath FilePath::RemoveFileName() const { - const char* const last_sep = FindLastPathSeparator(); - String dir; - if (last_sep) { - dir = String(c_str(), last_sep + 1 - c_str()); - } else { - dir = kCurrentDirectoryString; - } - return FilePath(dir); -} - -// Helper functions for naming files in a directory for xml output. - -// Given directory = "dir", base_name = "test", number = 0, -// extension = "xml", returns "dir/test.xml". If number is greater -// than zero (e.g., 12), returns "dir/test_12.xml". -// On Windows platform, uses \ as the separator rather than /. -FilePath FilePath::MakeFileName(const FilePath& directory, - const FilePath& base_name, - int number, - const char* extension) { - String file; - if (number == 0) { - file = String::Format("%s.%s", base_name.c_str(), extension); - } else { - file = String::Format("%s_%d.%s", base_name.c_str(), number, extension); - } - return ConcatPaths(directory, FilePath(file)); -} - -// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml". -// On Windows, uses \ as the separator rather than /. -FilePath FilePath::ConcatPaths(const FilePath& directory, - const FilePath& relative_path) { - if (directory.IsEmpty()) - return relative_path; - const FilePath dir(directory.RemoveTrailingPathSeparator()); - return FilePath(String::Format("%s%c%s", dir.c_str(), kPathSeparator, - relative_path.c_str())); -} - -// Returns true if pathname describes something findable in the file-system, -// either a file, directory, or whatever. -bool FilePath::FileOrDirectoryExists() const { -#if GTEST_OS_WINDOWS_MOBILE - LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str()); - const DWORD attributes = GetFileAttributes(unicode); - delete [] unicode; - return attributes != kInvalidFileAttributes; -#else - posix::StatStruct file_stat; - return posix::Stat(pathname_.c_str(), &file_stat) == 0; -#endif // GTEST_OS_WINDOWS_MOBILE -} - -// Returns true if pathname describes a directory in the file-system -// that exists. -bool FilePath::DirectoryExists() const { - bool result = false; -#if GTEST_OS_WINDOWS - // Don't strip off trailing separator if path is a root directory on - // Windows (like "C:\\"). - const FilePath& path(IsRootDirectory() ? *this : - RemoveTrailingPathSeparator()); -#else - const FilePath& path(*this); -#endif - -#if GTEST_OS_WINDOWS_MOBILE - LPCWSTR unicode = String::AnsiToUtf16(path.c_str()); - const DWORD attributes = GetFileAttributes(unicode); - delete [] unicode; - if ((attributes != kInvalidFileAttributes) && - (attributes & FILE_ATTRIBUTE_DIRECTORY)) { - result = true; - } -#else - posix::StatStruct file_stat; - result = posix::Stat(path.c_str(), &file_stat) == 0 && - posix::IsDir(file_stat); -#endif // GTEST_OS_WINDOWS_MOBILE - - return result; -} - -// Returns true if pathname describes a root directory. (Windows has one -// root directory per disk drive.) -bool FilePath::IsRootDirectory() const { -#if GTEST_OS_WINDOWS - // TODO(wan@google.com): on Windows a network share like - // \\server\share can be a root directory, although it cannot be the - // current directory. Handle this properly. - return pathname_.length() == 3 && IsAbsolutePath(); -#else - return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]); -#endif -} - -// Returns true if pathname describes an absolute path. -bool FilePath::IsAbsolutePath() const { - const char* const name = pathname_.c_str(); -#if GTEST_OS_WINDOWS - return pathname_.length() >= 3 && - ((name[0] >= 'a' && name[0] <= 'z') || - (name[0] >= 'A' && name[0] <= 'Z')) && - name[1] == ':' && - IsPathSeparator(name[2]); -#else - return IsPathSeparator(name[0]); -#endif -} - -// Returns a pathname for a file that does not currently exist. The pathname -// will be directory/base_name.extension or -// directory/base_name_.extension if directory/base_name.extension -// already exists. The number will be incremented until a pathname is found -// that does not already exist. -// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. -// There could be a race condition if two or more processes are calling this -// function at the same time -- they could both pick the same filename. -FilePath FilePath::GenerateUniqueFileName(const FilePath& directory, - const FilePath& base_name, - const char* extension) { - FilePath full_pathname; - int number = 0; - do { - full_pathname.Set(MakeFileName(directory, base_name, number++, extension)); - } while (full_pathname.FileOrDirectoryExists()); - return full_pathname; -} - -// Returns true if FilePath ends with a path separator, which indicates that -// it is intended to represent a directory. Returns false otherwise. -// This does NOT check that a directory (or file) actually exists. -bool FilePath::IsDirectory() const { - return !pathname_.empty() && - IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]); -} - -// Create directories so that path exists. Returns true if successful or if -// the directories already exist; returns false if unable to create directories -// for any reason. -bool FilePath::CreateDirectoriesRecursively() const { - if (!this->IsDirectory()) { - return false; - } - - if (pathname_.length() == 0 || this->DirectoryExists()) { - return true; - } - - const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName()); - return parent.CreateDirectoriesRecursively() && this->CreateFolder(); -} - -// Create the directory so that path exists. Returns true if successful or -// if the directory already exists; returns false if unable to create the -// directory for any reason, including if the parent directory does not -// exist. Not named "CreateDirectory" because that's a macro on Windows. -bool FilePath::CreateFolder() const { -#if GTEST_OS_WINDOWS_MOBILE - FilePath removed_sep(this->RemoveTrailingPathSeparator()); - LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str()); - int result = CreateDirectory(unicode, NULL) ? 0 : -1; - delete [] unicode; -#elif GTEST_OS_WINDOWS - int result = _mkdir(pathname_.c_str()); -#else - int result = mkdir(pathname_.c_str(), 0777); -#endif // GTEST_OS_WINDOWS_MOBILE - - if (result == -1) { - return this->DirectoryExists(); // An error is OK if the directory exists. - } - return true; // No error. -} - -// If input name has a trailing separator character, remove it and return the -// name, otherwise return the name string unmodified. -// On Windows platform, uses \ as the separator, other platforms use /. -FilePath FilePath::RemoveTrailingPathSeparator() const { - return IsDirectory() - ? FilePath(String(pathname_.c_str(), pathname_.length() - 1)) - : *this; -} - -// Removes any redundant separators that might be in the pathname. -// For example, "bar///foo" becomes "bar/foo". Does not eliminate other -// redundancies that might be in a pathname involving "." or "..". -// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share). -void FilePath::Normalize() { - if (pathname_.c_str() == NULL) { - pathname_ = ""; - return; - } - const char* src = pathname_.c_str(); - char* const dest = new char[pathname_.length() + 1]; - char* dest_ptr = dest; - memset(dest_ptr, 0, pathname_.length() + 1); - - while (*src != '\0') { - *dest_ptr = *src; - if (!IsPathSeparator(*src)) { - src++; - } else { -#if GTEST_HAS_ALT_PATH_SEP_ - if (*dest_ptr == kAlternatePathSeparator) { - *dest_ptr = kPathSeparator; - } -#endif - while (IsPathSeparator(*src)) - src++; - } - dest_ptr++; - } - *dest_ptr = '\0'; - pathname_ = dest; - delete[] dest; -} - -} // namespace internal -} // namespace testing -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) - - -#include -#include -#include -#include - -#if GTEST_OS_WINDOWS_MOBILE -# include // For TerminateProcess() -#elif GTEST_OS_WINDOWS -# include -# include -#else -# include -#endif // GTEST_OS_WINDOWS_MOBILE - -#if GTEST_OS_MAC -# include -# include -# include -#endif // GTEST_OS_MAC - - -// Indicates that this translation unit is part of Google Test's -// implementation. It must come before gtest-internal-inl.h is -// included, or there will be a compiler error. This trick is to -// prevent a user from accidentally including gtest-internal-inl.h in -// his code. -#define GTEST_IMPLEMENTATION_ 1 -#undef GTEST_IMPLEMENTATION_ - -namespace testing { -namespace internal { - -#if defined(_MSC_VER) || defined(__BORLANDC__) -// MSVC and C++Builder do not provide a definition of STDERR_FILENO. -const int kStdOutFileno = 1; -const int kStdErrFileno = 2; -#else -const int kStdOutFileno = STDOUT_FILENO; -const int kStdErrFileno = STDERR_FILENO; -#endif // _MSC_VER - -#if GTEST_OS_MAC - -// Returns the number of threads running in the process, or 0 to indicate that -// we cannot detect it. -size_t GetThreadCount() { - const task_t task = mach_task_self(); - mach_msg_type_number_t thread_count; - thread_act_array_t thread_list; - const kern_return_t status = task_threads(task, &thread_list, &thread_count); - if (status == KERN_SUCCESS) { - // task_threads allocates resources in thread_list and we need to free them - // to avoid leaks. - vm_deallocate(task, - reinterpret_cast(thread_list), - sizeof(thread_t) * thread_count); - return static_cast(thread_count); - } else { - return 0; - } -} - -#else - -size_t GetThreadCount() { - // There's no portable way to detect the number of threads, so we just - // return 0 to indicate that we cannot detect it. - return 0; -} - -#endif // GTEST_OS_MAC - -#if GTEST_USES_POSIX_RE - -// Implements RE. Currently only needed for death tests. - -RE::~RE() { - if (is_valid_) { - // regfree'ing an invalid regex might crash because the content - // of the regex is undefined. Since the regex's are essentially - // the same, one cannot be valid (or invalid) without the other - // being so too. - regfree(&partial_regex_); - regfree(&full_regex_); - } - free(const_cast(pattern_)); -} - -// Returns true iff regular expression re matches the entire str. -bool RE::FullMatch(const char* str, const RE& re) { - if (!re.is_valid_) return false; - - regmatch_t match; - return regexec(&re.full_regex_, str, 1, &match, 0) == 0; -} - -// Returns true iff regular expression re matches a substring of str -// (including str itself). -bool RE::PartialMatch(const char* str, const RE& re) { - if (!re.is_valid_) return false; - - regmatch_t match; - return regexec(&re.partial_regex_, str, 1, &match, 0) == 0; -} - -// Initializes an RE from its string representation. -void RE::Init(const char* regex) { - pattern_ = posix::StrDup(regex); - - // Reserves enough bytes to hold the regular expression used for a - // full match. - const size_t full_regex_len = strlen(regex) + 10; - char* const full_pattern = new char[full_regex_len]; - - snprintf(full_pattern, full_regex_len, "^(%s)$", regex); - is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0; - // We want to call regcomp(&partial_regex_, ...) even if the - // previous expression returns false. Otherwise partial_regex_ may - // not be properly initialized can may cause trouble when it's - // freed. - // - // Some implementation of POSIX regex (e.g. on at least some - // versions of Cygwin) doesn't accept the empty string as a valid - // regex. We change it to an equivalent form "()" to be safe. - if (is_valid_) { - const char* const partial_regex = (*regex == '\0') ? "()" : regex; - is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0; - } - EXPECT_TRUE(is_valid_) - << "Regular expression \"" << regex - << "\" is not a valid POSIX Extended regular expression."; - - delete[] full_pattern; -} - -#elif GTEST_USES_SIMPLE_RE - -// Returns true iff ch appears anywhere in str (excluding the -// terminating '\0' character). -bool IsInSet(char ch, const char* str) { - return ch != '\0' && strchr(str, ch) != NULL; -} - -// Returns true iff ch belongs to the given classification. Unlike -// similar functions in , these aren't affected by the -// current locale. -bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; } -bool IsAsciiPunct(char ch) { - return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"); -} -bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); } -bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); } -bool IsAsciiWordChar(char ch) { - return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || - ('0' <= ch && ch <= '9') || ch == '_'; -} - -// Returns true iff "\\c" is a supported escape sequence. -bool IsValidEscape(char c) { - return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW")); -} - -// Returns true iff the given atom (specified by escaped and pattern) -// matches ch. The result is undefined if the atom is invalid. -bool AtomMatchesChar(bool escaped, char pattern_char, char ch) { - if (escaped) { // "\\p" where p is pattern_char. - switch (pattern_char) { - case 'd': return IsAsciiDigit(ch); - case 'D': return !IsAsciiDigit(ch); - case 'f': return ch == '\f'; - case 'n': return ch == '\n'; - case 'r': return ch == '\r'; - case 's': return IsAsciiWhiteSpace(ch); - case 'S': return !IsAsciiWhiteSpace(ch); - case 't': return ch == '\t'; - case 'v': return ch == '\v'; - case 'w': return IsAsciiWordChar(ch); - case 'W': return !IsAsciiWordChar(ch); - } - return IsAsciiPunct(pattern_char) && pattern_char == ch; - } - - return (pattern_char == '.' && ch != '\n') || pattern_char == ch; -} - -// Helper function used by ValidateRegex() to format error messages. -String FormatRegexSyntaxError(const char* regex, int index) { - return (Message() << "Syntax error at index " << index - << " in simple regular expression \"" << regex << "\": ").GetString(); -} - -// Generates non-fatal failures and returns false if regex is invalid; -// otherwise returns true. -bool ValidateRegex(const char* regex) { - if (regex == NULL) { - // TODO(wan@google.com): fix the source file location in the - // assertion failures to match where the regex is used in user - // code. - ADD_FAILURE() << "NULL is not a valid simple regular expression."; - return false; - } - - bool is_valid = true; - - // True iff ?, *, or + can follow the previous atom. - bool prev_repeatable = false; - for (int i = 0; regex[i]; i++) { - if (regex[i] == '\\') { // An escape sequence - i++; - if (regex[i] == '\0') { - ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) - << "'\\' cannot appear at the end."; - return false; - } - - if (!IsValidEscape(regex[i])) { - ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) - << "invalid escape sequence \"\\" << regex[i] << "\"."; - is_valid = false; - } - prev_repeatable = true; - } else { // Not an escape sequence. - const char ch = regex[i]; - - if (ch == '^' && i > 0) { - ADD_FAILURE() << FormatRegexSyntaxError(regex, i) - << "'^' can only appear at the beginning."; - is_valid = false; - } else if (ch == '$' && regex[i + 1] != '\0') { - ADD_FAILURE() << FormatRegexSyntaxError(regex, i) - << "'$' can only appear at the end."; - is_valid = false; - } else if (IsInSet(ch, "()[]{}|")) { - ADD_FAILURE() << FormatRegexSyntaxError(regex, i) - << "'" << ch << "' is unsupported."; - is_valid = false; - } else if (IsRepeat(ch) && !prev_repeatable) { - ADD_FAILURE() << FormatRegexSyntaxError(regex, i) - << "'" << ch << "' can only follow a repeatable token."; - is_valid = false; - } - - prev_repeatable = !IsInSet(ch, "^$?*+"); - } - } - - return is_valid; -} - -// Matches a repeated regex atom followed by a valid simple regular -// expression. The regex atom is defined as c if escaped is false, -// or \c otherwise. repeat is the repetition meta character (?, *, -// or +). The behavior is undefined if str contains too many -// characters to be indexable by size_t, in which case the test will -// probably time out anyway. We are fine with this limitation as -// std::string has it too. -bool MatchRepetitionAndRegexAtHead( - bool escaped, char c, char repeat, const char* regex, - const char* str) { - const size_t min_count = (repeat == '+') ? 1 : 0; - const size_t max_count = (repeat == '?') ? 1 : - static_cast(-1) - 1; - // We cannot call numeric_limits::max() as it conflicts with the - // max() macro on Windows. - - for (size_t i = 0; i <= max_count; ++i) { - // We know that the atom matches each of the first i characters in str. - if (i >= min_count && MatchRegexAtHead(regex, str + i)) { - // We have enough matches at the head, and the tail matches too. - // Since we only care about *whether* the pattern matches str - // (as opposed to *how* it matches), there is no need to find a - // greedy match. - return true; - } - if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) - return false; - } - return false; -} - -// Returns true iff regex matches a prefix of str. regex must be a -// valid simple regular expression and not start with "^", or the -// result is undefined. -bool MatchRegexAtHead(const char* regex, const char* str) { - if (*regex == '\0') // An empty regex matches a prefix of anything. - return true; - - // "$" only matches the end of a string. Note that regex being - // valid guarantees that there's nothing after "$" in it. - if (*regex == '$') - return *str == '\0'; - - // Is the first thing in regex an escape sequence? - const bool escaped = *regex == '\\'; - if (escaped) - ++regex; - if (IsRepeat(regex[1])) { - // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so - // here's an indirect recursion. It terminates as the regex gets - // shorter in each recursion. - return MatchRepetitionAndRegexAtHead( - escaped, regex[0], regex[1], regex + 2, str); - } else { - // regex isn't empty, isn't "$", and doesn't start with a - // repetition. We match the first atom of regex with the first - // character of str and recurse. - return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) && - MatchRegexAtHead(regex + 1, str + 1); - } -} - -// Returns true iff regex matches any substring of str. regex must be -// a valid simple regular expression, or the result is undefined. -// -// The algorithm is recursive, but the recursion depth doesn't exceed -// the regex length, so we won't need to worry about running out of -// stack space normally. In rare cases the time complexity can be -// exponential with respect to the regex length + the string length, -// but usually it's must faster (often close to linear). -bool MatchRegexAnywhere(const char* regex, const char* str) { - if (regex == NULL || str == NULL) - return false; - - if (*regex == '^') - return MatchRegexAtHead(regex + 1, str); - - // A successful match can be anywhere in str. - do { - if (MatchRegexAtHead(regex, str)) - return true; - } while (*str++ != '\0'); - return false; -} - -// Implements the RE class. - -RE::~RE() { - free(const_cast(pattern_)); - free(const_cast(full_pattern_)); -} - -// Returns true iff regular expression re matches the entire str. -bool RE::FullMatch(const char* str, const RE& re) { - return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str); -} - -// Returns true iff regular expression re matches a substring of str -// (including str itself). -bool RE::PartialMatch(const char* str, const RE& re) { - return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str); -} - -// Initializes an RE from its string representation. -void RE::Init(const char* regex) { - pattern_ = full_pattern_ = NULL; - if (regex != NULL) { - pattern_ = posix::StrDup(regex); - } - - is_valid_ = ValidateRegex(regex); - if (!is_valid_) { - // No need to calculate the full pattern when the regex is invalid. - return; - } - - const size_t len = strlen(regex); - // Reserves enough bytes to hold the regular expression used for a - // full match: we need space to prepend a '^', append a '$', and - // terminate the string with '\0'. - char* buffer = static_cast(malloc(len + 3)); - full_pattern_ = buffer; - - if (*regex != '^') - *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'. - - // We don't use snprintf or strncpy, as they trigger a warning when - // compiled with VC++ 8.0. - memcpy(buffer, regex, len); - buffer += len; - - if (len == 0 || regex[len - 1] != '$') - *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'. - - *buffer = '\0'; -} - -#endif // GTEST_USES_POSIX_RE - -const char kUnknownFile[] = "unknown file"; - -// Formats a source file path and a line number as they would appear -// in an error message from the compiler used to compile this code. -GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) { - const char* const file_name = file == NULL ? kUnknownFile : file; - - if (line < 0) { - return String::Format("%s:", file_name).c_str(); - } -#ifdef _MSC_VER - return String::Format("%s(%d):", file_name, line).c_str(); -#else - return String::Format("%s:%d:", file_name, line).c_str(); -#endif // _MSC_VER -} - -// Formats a file location for compiler-independent XML output. -// Although this function is not platform dependent, we put it next to -// FormatFileLocation in order to contrast the two functions. -// Note that FormatCompilerIndependentFileLocation() does NOT append colon -// to the file location it produces, unlike FormatFileLocation(). -GTEST_API_ ::std::string FormatCompilerIndependentFileLocation( - const char* file, int line) { - const char* const file_name = file == NULL ? kUnknownFile : file; - - if (line < 0) - return file_name; - else - return String::Format("%s:%d", file_name, line).c_str(); -} - - -GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line) - : severity_(severity) { - const char* const marker = - severity == GTEST_INFO ? "[ INFO ]" : - severity == GTEST_WARNING ? "[WARNING]" : - severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]"; - GetStream() << ::std::endl << marker << " " - << FormatFileLocation(file, line).c_str() << ": "; -} - -// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program. -GTestLog::~GTestLog() { - GetStream() << ::std::endl; - if (severity_ == GTEST_FATAL) { - fflush(stderr); - posix::Abort(); - } -} -// Disable Microsoft deprecation warnings for POSIX functions called from -// this class (creat, dup, dup2, and close) -#ifdef _MSC_VER -# pragma warning(push) -# pragma warning(disable: 4996) -#endif // _MSC_VER - -#if GTEST_HAS_STREAM_REDIRECTION - -// Object that captures an output stream (stdout/stderr). -class CapturedStream { - public: - // The ctor redirects the stream to a temporary file. - CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) { - -# if GTEST_OS_WINDOWS - char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT - char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT - - ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path); - const UINT success = ::GetTempFileNameA(temp_dir_path, - "gtest_redir", - 0, // Generate unique file name. - temp_file_path); - GTEST_CHECK_(success != 0) - << "Unable to create a temporary file in " << temp_dir_path; - const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE); - GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file " - << temp_file_path; - filename_ = temp_file_path; -# else - // There's no guarantee that a test has write access to the - // current directory, so we create the temporary file in the /tmp - // directory instead. - char name_template[] = "/tmp/captured_stream.XXXXXX"; - const int captured_fd = mkstemp(name_template); - filename_ = name_template; -# endif // GTEST_OS_WINDOWS - fflush(NULL); - dup2(captured_fd, fd_); - close(captured_fd); - } - - ~CapturedStream() { - remove(filename_.c_str()); - } - - String GetCapturedString() { - if (uncaptured_fd_ != -1) { - // Restores the original stream. - fflush(NULL); - dup2(uncaptured_fd_, fd_); - close(uncaptured_fd_); - uncaptured_fd_ = -1; - } - - FILE* const file = posix::FOpen(filename_.c_str(), "r"); - const String content = ReadEntireFile(file); - posix::FClose(file); - return content; - } - - private: - // Reads the entire content of a file as a String. - static String ReadEntireFile(FILE* file); - - // Returns the size (in bytes) of a file. - static size_t GetFileSize(FILE* file); - - const int fd_; // A stream to capture. - int uncaptured_fd_; - // Name of the temporary file holding the stderr output. - ::std::string filename_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream); -}; - -// Returns the size (in bytes) of a file. -size_t CapturedStream::GetFileSize(FILE* file) { - fseek(file, 0, SEEK_END); - return static_cast(ftell(file)); -} - -// Reads the entire content of a file as a string. -String CapturedStream::ReadEntireFile(FILE* file) { - const size_t file_size = GetFileSize(file); - char* const buffer = new char[file_size]; - - size_t bytes_last_read = 0; // # of bytes read in the last fread() - size_t bytes_read = 0; // # of bytes read so far - - fseek(file, 0, SEEK_SET); - - // Keeps reading the file until we cannot read further or the - // pre-determined file size is reached. - do { - bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file); - bytes_read += bytes_last_read; - } while (bytes_last_read > 0 && bytes_read < file_size); - - const String content(buffer, bytes_read); - delete[] buffer; - - return content; -} - -# ifdef _MSC_VER -# pragma warning(pop) -# endif // _MSC_VER - -static CapturedStream* g_captured_stderr = NULL; -static CapturedStream* g_captured_stdout = NULL; - -// Starts capturing an output stream (stdout/stderr). -void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) { - if (*stream != NULL) { - GTEST_LOG_(FATAL) << "Only one " << stream_name - << " capturer can exist at a time."; - } - *stream = new CapturedStream(fd); -} - -// Stops capturing the output stream and returns the captured string. -String GetCapturedStream(CapturedStream** captured_stream) { - const String content = (*captured_stream)->GetCapturedString(); - - delete *captured_stream; - *captured_stream = NULL; - - return content; -} - -// Starts capturing stdout. -void CaptureStdout() { - CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout); -} - -// Starts capturing stderr. -void CaptureStderr() { - CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr); -} - -// Stops capturing stdout and returns the captured string. -String GetCapturedStdout() { return GetCapturedStream(&g_captured_stdout); } - -// Stops capturing stderr and returns the captured string. -String GetCapturedStderr() { return GetCapturedStream(&g_captured_stderr); } - -#endif // GTEST_HAS_STREAM_REDIRECTION - -#if GTEST_HAS_DEATH_TEST - -// A copy of all command line arguments. Set by InitGoogleTest(). -::std::vector g_argvs; - -// Returns the command line as a vector of strings. -const ::std::vector& GetArgvs() { return g_argvs; } - -#endif // GTEST_HAS_DEATH_TEST - -#if GTEST_OS_WINDOWS_MOBILE -namespace posix { -void Abort() { - DebugBreak(); - TerminateProcess(GetCurrentProcess(), 1); -} -} // namespace posix -#endif // GTEST_OS_WINDOWS_MOBILE - -// Returns the name of the environment variable corresponding to the -// given flag. For example, FlagToEnvVar("foo") will return -// "GTEST_FOO" in the open-source version. -static String FlagToEnvVar(const char* flag) { - const String full_flag = - (Message() << GTEST_FLAG_PREFIX_ << flag).GetString(); - - Message env_var; - for (size_t i = 0; i != full_flag.length(); i++) { - env_var << ToUpper(full_flag.c_str()[i]); - } - - return env_var.GetString(); -} - -// Parses 'str' for a 32-bit signed integer. If successful, writes -// the result to *value and returns true; otherwise leaves *value -// unchanged and returns false. -bool ParseInt32(const Message& src_text, const char* str, Int32* value) { - // Parses the environment variable as a decimal integer. - char* end = NULL; - const long long_value = strtol(str, &end, 10); // NOLINT - - // Has strtol() consumed all characters in the string? - if (*end != '\0') { - // No - an invalid character was encountered. - Message msg; - msg << "WARNING: " << src_text - << " is expected to be a 32-bit integer, but actually" - << " has value \"" << str << "\".\n"; - printf("%s", msg.GetString().c_str()); - fflush(stdout); - return false; - } - - // Is the parsed value in the range of an Int32? - const Int32 result = static_cast(long_value); - if (long_value == LONG_MAX || long_value == LONG_MIN || - // The parsed value overflows as a long. (strtol() returns - // LONG_MAX or LONG_MIN when the input overflows.) - result != long_value - // The parsed value overflows as an Int32. - ) { - Message msg; - msg << "WARNING: " << src_text - << " is expected to be a 32-bit integer, but actually" - << " has value " << str << ", which overflows.\n"; - printf("%s", msg.GetString().c_str()); - fflush(stdout); - return false; - } - - *value = result; - return true; -} - -// Reads and returns the Boolean environment variable corresponding to -// the given flag; if it's not set, returns default_value. -// -// The value is considered true iff it's not "0". -bool BoolFromGTestEnv(const char* flag, bool default_value) { - const String env_var = FlagToEnvVar(flag); - const char* const string_value = posix::GetEnv(env_var.c_str()); - return string_value == NULL ? - default_value : strcmp(string_value, "0") != 0; -} - -// Reads and returns a 32-bit integer stored in the environment -// variable corresponding to the given flag; if it isn't set or -// doesn't represent a valid 32-bit integer, returns default_value. -Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) { - const String env_var = FlagToEnvVar(flag); - const char* const string_value = posix::GetEnv(env_var.c_str()); - if (string_value == NULL) { - // The environment variable is not set. - return default_value; - } - - Int32 result = default_value; - if (!ParseInt32(Message() << "Environment variable " << env_var, - string_value, &result)) { - printf("The default value %s is used.\n", - (Message() << default_value).GetString().c_str()); - fflush(stdout); - return default_value; - } - - return result; -} - -// Reads and returns the string environment variable corresponding to -// the given flag; if it's not set, returns default_value. -const char* StringFromGTestEnv(const char* flag, const char* default_value) { - const String env_var = FlagToEnvVar(flag); - const char* const value = posix::GetEnv(env_var.c_str()); - return value == NULL ? default_value : value; -} - -} // namespace internal -} // namespace testing -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) - -// Google Test - The Google C++ Testing Framework -// -// This file implements a universal value printer that can print a -// value of any type T: -// -// void ::testing::internal::UniversalPrinter::Print(value, ostream_ptr); -// -// It uses the << operator when possible, and prints the bytes in the -// object otherwise. A user can override its behavior for a class -// type Foo by defining either operator<<(::std::ostream&, const Foo&) -// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that -// defines Foo. - -#include -#include -#include // NOLINT -#include - -namespace testing { - -namespace { - -using ::std::ostream; - -#if GTEST_OS_WINDOWS_MOBILE // Windows CE does not define _snprintf_s. -# define snprintf _snprintf -#elif _MSC_VER >= 1400 // VC 8.0 and later deprecate snprintf and _snprintf. -# define snprintf _snprintf_s -#elif _MSC_VER -# define snprintf _snprintf -#endif // GTEST_OS_WINDOWS_MOBILE - -// Prints a segment of bytes in the given object. -void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start, - size_t count, ostream* os) { - char text[5] = ""; - for (size_t i = 0; i != count; i++) { - const size_t j = start + i; - if (i != 0) { - // Organizes the bytes into groups of 2 for easy parsing by - // human. - if ((j % 2) == 0) - *os << ' '; - else - *os << '-'; - } - snprintf(text, sizeof(text), "%02X", obj_bytes[j]); - *os << text; - } -} - -// Prints the bytes in the given value to the given ostream. -void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count, - ostream* os) { - // Tells the user how big the object is. - *os << count << "-byte object <"; - - const size_t kThreshold = 132; - const size_t kChunkSize = 64; - // If the object size is bigger than kThreshold, we'll have to omit - // some details by printing only the first and the last kChunkSize - // bytes. - // TODO(wan): let the user control the threshold using a flag. - if (count < kThreshold) { - PrintByteSegmentInObjectTo(obj_bytes, 0, count, os); - } else { - PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os); - *os << " ... "; - // Rounds up to 2-byte boundary. - const size_t resume_pos = (count - kChunkSize + 1)/2*2; - PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os); - } - *os << ">"; -} - -} // namespace - -namespace internal2 { - -// Delegates to PrintBytesInObjectToImpl() to print the bytes in the -// given object. The delegation simplifies the implementation, which -// uses the << operator and thus is easier done outside of the -// ::testing::internal namespace, which contains a << operator that -// sometimes conflicts with the one in STL. -void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count, - ostream* os) { - PrintBytesInObjectToImpl(obj_bytes, count, os); -} - -} // namespace internal2 - -namespace internal { - -// Depending on the value of a char (or wchar_t), we print it in one -// of three formats: -// - as is if it's a printable ASCII (e.g. 'a', '2', ' '), -// - as a hexidecimal escape sequence (e.g. '\x7F'), or -// - as a special escape sequence (e.g. '\r', '\n'). -enum CharFormat { - kAsIs, - kHexEscape, - kSpecialEscape -}; - -// Returns true if c is a printable ASCII character. We test the -// value of c directly instead of calling isprint(), which is buggy on -// Windows Mobile. -inline bool IsPrintableAscii(wchar_t c) { - return 0x20 <= c && c <= 0x7E; -} - -// Prints a wide or narrow char c as a character literal without the -// quotes, escaping it when necessary; returns how c was formatted. -// The template argument UnsignedChar is the unsigned version of Char, -// which is the type of c. -template -static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) { - switch (static_cast(c)) { - case L'\0': - *os << "\\0"; - break; - case L'\'': - *os << "\\'"; - break; - case L'\\': - *os << "\\\\"; - break; - case L'\a': - *os << "\\a"; - break; - case L'\b': - *os << "\\b"; - break; - case L'\f': - *os << "\\f"; - break; - case L'\n': - *os << "\\n"; - break; - case L'\r': - *os << "\\r"; - break; - case L'\t': - *os << "\\t"; - break; - case L'\v': - *os << "\\v"; - break; - default: - if (IsPrintableAscii(c)) { - *os << static_cast(c); - return kAsIs; - } else { - *os << String::Format("\\x%X", static_cast(c)); - return kHexEscape; - } - } - return kSpecialEscape; -} - -// Prints a char c as if it's part of a string literal, escaping it when -// necessary; returns how c was formatted. -static CharFormat PrintAsWideStringLiteralTo(wchar_t c, ostream* os) { - switch (c) { - case L'\'': - *os << "'"; - return kAsIs; - case L'"': - *os << "\\\""; - return kSpecialEscape; - default: - return PrintAsCharLiteralTo(c, os); - } -} - -// Prints a char c as if it's part of a string literal, escaping it when -// necessary; returns how c was formatted. -static CharFormat PrintAsNarrowStringLiteralTo(char c, ostream* os) { - return PrintAsWideStringLiteralTo(static_cast(c), os); -} - -// Prints a wide or narrow character c and its code. '\0' is printed -// as "'\\0'", other unprintable characters are also properly escaped -// using the standard C++ escape sequence. The template argument -// UnsignedChar is the unsigned version of Char, which is the type of c. -template -void PrintCharAndCodeTo(Char c, ostream* os) { - // First, print c as a literal in the most readable form we can find. - *os << ((sizeof(c) > 1) ? "L'" : "'"); - const CharFormat format = PrintAsCharLiteralTo(c, os); - *os << "'"; - - // To aid user debugging, we also print c's code in decimal, unless - // it's 0 (in which case c was printed as '\\0', making the code - // obvious). - if (c == 0) - return; - *os << " (" << String::Format("%d", c).c_str(); - - // For more convenience, we print c's code again in hexidecimal, - // unless c was already printed in the form '\x##' or the code is in - // [1, 9]. - if (format == kHexEscape || (1 <= c && c <= 9)) { - // Do nothing. - } else { - *os << String::Format(", 0x%X", - static_cast(c)).c_str(); - } - *os << ")"; -} - -void PrintTo(unsigned char c, ::std::ostream* os) { - PrintCharAndCodeTo(c, os); -} -void PrintTo(signed char c, ::std::ostream* os) { - PrintCharAndCodeTo(c, os); -} - -// Prints a wchar_t as a symbol if it is printable or as its internal -// code otherwise and also as its code. L'\0' is printed as "L'\\0'". -void PrintTo(wchar_t wc, ostream* os) { - PrintCharAndCodeTo(wc, os); -} - -// Prints the given array of characters to the ostream. -// The array starts at *begin, the length is len, it may include '\0' characters -// and may not be null-terminated. -static void PrintCharsAsStringTo(const char* begin, size_t len, ostream* os) { - *os << "\""; - bool is_previous_hex = false; - for (size_t index = 0; index < len; ++index) { - const char cur = begin[index]; - if (is_previous_hex && IsXDigit(cur)) { - // Previous character is of '\x..' form and this character can be - // interpreted as another hexadecimal digit in its number. Break string to - // disambiguate. - *os << "\" \""; - } - is_previous_hex = PrintAsNarrowStringLiteralTo(cur, os) == kHexEscape; - } - *os << "\""; -} - -// Prints a (const) char array of 'len' elements, starting at address 'begin'. -void UniversalPrintArray(const char* begin, size_t len, ostream* os) { - PrintCharsAsStringTo(begin, len, os); -} - -// Prints the given array of wide characters to the ostream. -// The array starts at *begin, the length is len, it may include L'\0' -// characters and may not be null-terminated. -static void PrintWideCharsAsStringTo(const wchar_t* begin, size_t len, - ostream* os) { - *os << "L\""; - bool is_previous_hex = false; - for (size_t index = 0; index < len; ++index) { - const wchar_t cur = begin[index]; - if (is_previous_hex && isascii(cur) && IsXDigit(static_cast(cur))) { - // Previous character is of '\x..' form and this character can be - // interpreted as another hexadecimal digit in its number. Break string to - // disambiguate. - *os << "\" L\""; - } - is_previous_hex = PrintAsWideStringLiteralTo(cur, os) == kHexEscape; - } - *os << "\""; -} - -// Prints the given C string to the ostream. -void PrintTo(const char* s, ostream* os) { - if (s == NULL) { - *os << "NULL"; - } else { - *os << ImplicitCast_(s) << " pointing to "; - PrintCharsAsStringTo(s, strlen(s), os); - } -} - -// MSVC compiler can be configured to define whar_t as a typedef -// of unsigned short. Defining an overload for const wchar_t* in that case -// would cause pointers to unsigned shorts be printed as wide strings, -// possibly accessing more memory than intended and causing invalid -// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when -// wchar_t is implemented as a native type. -#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) -// Prints the given wide C string to the ostream. -void PrintTo(const wchar_t* s, ostream* os) { - if (s == NULL) { - *os << "NULL"; - } else { - *os << ImplicitCast_(s) << " pointing to "; - PrintWideCharsAsStringTo(s, wcslen(s), os); - } -} -#endif // wchar_t is native - -// Prints a ::string object. -#if GTEST_HAS_GLOBAL_STRING -void PrintStringTo(const ::string& s, ostream* os) { - PrintCharsAsStringTo(s.data(), s.size(), os); -} -#endif // GTEST_HAS_GLOBAL_STRING - -void PrintStringTo(const ::std::string& s, ostream* os) { - PrintCharsAsStringTo(s.data(), s.size(), os); -} - -// Prints a ::wstring object. -#if GTEST_HAS_GLOBAL_WSTRING -void PrintWideStringTo(const ::wstring& s, ostream* os) { - PrintWideCharsAsStringTo(s.data(), s.size(), os); -} -#endif // GTEST_HAS_GLOBAL_WSTRING - -#if GTEST_HAS_STD_WSTRING -void PrintWideStringTo(const ::std::wstring& s, ostream* os) { - PrintWideCharsAsStringTo(s.data(), s.size(), os); -} -#endif // GTEST_HAS_STD_WSTRING - -} // namespace internal - -} // namespace testing -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: mheule@google.com (Markus Heule) -// -// The Google C++ Testing Framework (Google Test) - - -// Indicates that this translation unit is part of Google Test's -// implementation. It must come before gtest-internal-inl.h is -// included, or there will be a compiler error. This trick is to -// prevent a user from accidentally including gtest-internal-inl.h in -// his code. -#define GTEST_IMPLEMENTATION_ 1 -#undef GTEST_IMPLEMENTATION_ - -namespace testing { - -using internal::GetUnitTestImpl; - -// Gets the summary of the failure message by omitting the stack trace -// in it. -internal::String TestPartResult::ExtractSummary(const char* message) { - const char* const stack_trace = strstr(message, internal::kStackTraceMarker); - return stack_trace == NULL ? internal::String(message) : - internal::String(message, stack_trace - message); -} - -// Prints a TestPartResult object. -std::ostream& operator<<(std::ostream& os, const TestPartResult& result) { - return os - << result.file_name() << ":" << result.line_number() << ": " - << (result.type() == TestPartResult::kSuccess ? "Success" : - result.type() == TestPartResult::kFatalFailure ? "Fatal failure" : - "Non-fatal failure") << ":\n" - << result.message() << std::endl; -} - -// Appends a TestPartResult to the array. -void TestPartResultArray::Append(const TestPartResult& result) { - array_.push_back(result); -} - -// Returns the TestPartResult at the given index (0-based). -const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const { - if (index < 0 || index >= size()) { - printf("\nInvalid index (%d) into TestPartResultArray.\n", index); - internal::posix::Abort(); - } - - return array_[index]; -} - -// Returns the number of TestPartResult objects in the array. -int TestPartResultArray::size() const { - return static_cast(array_.size()); -} - -namespace internal { - -HasNewFatalFailureHelper::HasNewFatalFailureHelper() - : has_new_fatal_failure_(false), - original_reporter_(GetUnitTestImpl()-> - GetTestPartResultReporterForCurrentThread()) { - GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this); -} - -HasNewFatalFailureHelper::~HasNewFatalFailureHelper() { - GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread( - original_reporter_); -} - -void HasNewFatalFailureHelper::ReportTestPartResult( - const TestPartResult& result) { - if (result.fatally_failed()) - has_new_fatal_failure_ = true; - original_reporter_->ReportTestPartResult(result); -} - -} // namespace internal - -} // namespace testing -// Copyright 2008 Google Inc. -// All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) - - -namespace testing { -namespace internal { - -#if GTEST_HAS_TYPED_TEST_P - -// Skips to the first non-space char in str. Returns an empty string if str -// contains only whitespace characters. -static const char* SkipSpaces(const char* str) { - while (IsSpace(*str)) - str++; - return str; -} - -// Verifies that registered_tests match the test names in -// defined_test_names_; returns registered_tests if successful, or -// aborts the program otherwise. -const char* TypedTestCasePState::VerifyRegisteredTestNames( - const char* file, int line, const char* registered_tests) { - typedef ::std::set::const_iterator DefinedTestIter; - registered_ = true; - - // Skip initial whitespace in registered_tests since some - // preprocessors prefix stringizied literals with whitespace. - registered_tests = SkipSpaces(registered_tests); - - Message errors; - ::std::set tests; - for (const char* names = registered_tests; names != NULL; - names = SkipComma(names)) { - const String name = GetPrefixUntilComma(names); - if (tests.count(name) != 0) { - errors << "Test " << name << " is listed more than once.\n"; - continue; - } - - bool found = false; - for (DefinedTestIter it = defined_test_names_.begin(); - it != defined_test_names_.end(); - ++it) { - if (name == *it) { - found = true; - break; - } - } - - if (found) { - tests.insert(name); - } else { - errors << "No test named " << name - << " can be found in this test case.\n"; - } - } - - for (DefinedTestIter it = defined_test_names_.begin(); - it != defined_test_names_.end(); - ++it) { - if (tests.count(*it) == 0) { - errors << "You forgot to list test " << *it << ".\n"; - } - } - - const String& errors_str = errors.GetString(); - if (errors_str != "") { - fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(), - errors_str.c_str()); - fflush(stderr); - posix::Abort(); - } - - return registered_tests; -} - -#endif // GTEST_HAS_TYPED_TEST_P - -} // namespace internal -} // namespace testing diff --git a/kokkos/kokkos/TPL/gtest/gtest.h b/kokkos/kokkos/TPL/gtest/gtest.h deleted file mode 100644 index 5fc6f9e..0000000 --- a/kokkos/kokkos/TPL/gtest/gtest.h +++ /dev/null @@ -1,19537 +0,0 @@ -// Copyright 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) -// -// The Google C++ Testing Framework (Google Test) -// -// This header file defines the public API for Google Test. It should be -// included by any test program that uses Google Test. -// -// IMPORTANT NOTE: Due to limitation of the C++ language, we have to -// leave some internal implementation details in this header file. -// They are clearly marked by comments like this: -// -// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -// -// Such code is NOT meant to be used by a user directly, and is subject -// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user -// program! -// -// Acknowledgment: Google Test borrowed the idea of automatic test -// registration from Barthelemy Dagenais' (barthelemy@prologique.com) -// easyUnit framework. - -#ifndef GTEST_INCLUDE_GTEST_GTEST_H_ -#define GTEST_INCLUDE_GTEST_GTEST_H_ - -#include -#include - -// Copyright 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) -// -// The Google C++ Testing Framework (Google Test) -// -// This header file declares functions and macros used internally by -// Google Test. They are subject to change without notice. - -#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ -#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ - -// Copyright 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Authors: wan@google.com (Zhanyong Wan) -// -// Low-level types and utilities for porting Google Test to various -// platforms. They are subject to change without notice. DO NOT USE -// THEM IN USER CODE. - -#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ -#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ - -// The user can define the following macros in the build script to -// control Google Test's behavior. If the user doesn't define a macro -// in this list, Google Test will define it. -// -// GTEST_HAS_CLONE - Define it to 1/0 to indicate that clone(2) -// is/isn't available. -// GTEST_HAS_EXCEPTIONS - Define it to 1/0 to indicate that exceptions -// are enabled. -// GTEST_HAS_GLOBAL_STRING - Define it to 1/0 to indicate that ::string -// is/isn't available (some systems define -// ::string, which is different to std::string). -// GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string -// is/isn't available (some systems define -// ::wstring, which is different to std::wstring). -// GTEST_HAS_POSIX_RE - Define it to 1/0 to indicate that POSIX regular -// expressions are/aren't available. -// GTEST_HAS_PTHREAD - Define it to 1/0 to indicate that -// is/isn't available. -// GTEST_HAS_RTTI - Define it to 1/0 to indicate that RTTI is/isn't -// enabled. -// GTEST_HAS_STD_WSTRING - Define it to 1/0 to indicate that -// std::wstring does/doesn't work (Google Test can -// be used where std::wstring is unavailable). -// GTEST_HAS_TR1_TUPLE - Define it to 1/0 to indicate tr1::tuple -// is/isn't available. -// GTEST_HAS_SEH - Define it to 1/0 to indicate whether the -// compiler supports Microsoft's "Structured -// Exception Handling". -// GTEST_HAS_STREAM_REDIRECTION -// - Define it to 1/0 to indicate whether the -// platform supports I/O stream redirection using -// dup() and dup2(). -// GTEST_USE_OWN_TR1_TUPLE - Define it to 1/0 to indicate whether Google -// Test's own tr1 tuple implementation should be -// used. Unused when the user sets -// GTEST_HAS_TR1_TUPLE to 0. -// GTEST_LINKED_AS_SHARED_LIBRARY -// - Define to 1 when compiling tests that use -// Google Test as a shared library (known as -// DLL on Windows). -// GTEST_CREATE_SHARED_LIBRARY -// - Define to 1 when compiling Google Test itself -// as a shared library. - -// This header defines the following utilities: -// -// Macros indicating the current platform (defined to 1 if compiled on -// the given platform; otherwise undefined): -// GTEST_OS_AIX - IBM AIX -// GTEST_OS_CYGWIN - Cygwin -// GTEST_OS_HPUX - HP-UX -// GTEST_OS_LINUX - Linux -// GTEST_OS_LINUX_ANDROID - Google Android -// GTEST_OS_MAC - Mac OS X -// GTEST_OS_NACL - Google Native Client (NaCl) -// GTEST_OS_SOLARIS - Sun Solaris -// GTEST_OS_SYMBIAN - Symbian -// GTEST_OS_WINDOWS - Windows (Desktop, MinGW, or Mobile) -// GTEST_OS_WINDOWS_DESKTOP - Windows Desktop -// GTEST_OS_WINDOWS_MINGW - MinGW -// GTEST_OS_WINDOWS_MOBILE - Windows Mobile -// GTEST_OS_ZOS - z/OS -// -// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the -// most stable support. Since core members of the Google Test project -// don't have access to other platforms, support for them may be less -// stable. If you notice any problems on your platform, please notify -// googletestframework@googlegroups.com (patches for fixing them are -// even more welcome!). -// -// Note that it is possible that none of the GTEST_OS_* macros are defined. -// -// Macros indicating available Google Test features (defined to 1 if -// the corresponding feature is supported; otherwise undefined): -// GTEST_HAS_COMBINE - the Combine() function (for value-parameterized -// tests) -// GTEST_HAS_DEATH_TEST - death tests -// GTEST_HAS_PARAM_TEST - value-parameterized tests -// GTEST_HAS_TYPED_TEST - typed tests -// GTEST_HAS_TYPED_TEST_P - type-parameterized tests -// GTEST_USES_POSIX_RE - enhanced POSIX regex is used. Do not confuse with -// GTEST_HAS_POSIX_RE (see above) which users can -// define themselves. -// GTEST_USES_SIMPLE_RE - our own simple regex is used; -// the above two are mutually exclusive. -// GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ(). -// -// Macros for basic C++ coding: -// GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning. -// GTEST_ATTRIBUTE_UNUSED_ - declares that a class' instances or a -// variable don't have to be used. -// GTEST_DISALLOW_ASSIGN_ - disables operator=. -// GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=. -// GTEST_MUST_USE_RESULT_ - declares that a function's result must be used. -// -// Synchronization: -// Mutex, MutexLock, ThreadLocal, GetThreadCount() -// - synchronization primitives. -// GTEST_IS_THREADSAFE - defined to 1 to indicate that the above -// synchronization primitives have real implementations -// and Google Test is thread-safe; or 0 otherwise. -// -// Template meta programming: -// is_pointer - as in TR1; needed on Symbian and IBM XL C/C++ only. -// IteratorTraits - partial implementation of std::iterator_traits, which -// is not available in libCstd when compiled with Sun C++. -// -// Smart pointers: -// scoped_ptr - as in TR2. -// -// Regular expressions: -// RE - a simple regular expression class using the POSIX -// Extended Regular Expression syntax on UNIX-like -// platforms, or a reduced regular exception syntax on -// other platforms, including Windows. -// -// Logging: -// GTEST_LOG_() - logs messages at the specified severity level. -// LogToStderr() - directs all log messages to stderr. -// FlushInfoLog() - flushes informational log messages. -// -// Stdout and stderr capturing: -// CaptureStdout() - starts capturing stdout. -// GetCapturedStdout() - stops capturing stdout and returns the captured -// string. -// CaptureStderr() - starts capturing stderr. -// GetCapturedStderr() - stops capturing stderr and returns the captured -// string. -// -// Integer types: -// TypeWithSize - maps an integer to a int type. -// Int32, UInt32, Int64, UInt64, TimeInMillis -// - integers of known sizes. -// BiggestInt - the biggest signed integer type. -// -// Command-line utilities: -// GTEST_FLAG() - references a flag. -// GTEST_DECLARE_*() - declares a flag. -// GTEST_DEFINE_*() - defines a flag. -// GetArgvs() - returns the command line as a vector of strings. -// -// Environment variable utilities: -// GetEnv() - gets the value of an environment variable. -// BoolFromGTestEnv() - parses a bool environment variable. -// Int32FromGTestEnv() - parses an Int32 environment variable. -// StringFromGTestEnv() - parses a string environment variable. - -#include // for isspace, etc -#include // for ptrdiff_t -#include -#include -#include -#ifndef _WIN32_WCE -# include -# include -#endif // !_WIN32_WCE - -#include // NOLINT -#include // NOLINT -#include // NOLINT - -#define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com" -#define GTEST_FLAG_PREFIX_ "gtest_" -#define GTEST_FLAG_PREFIX_DASH_ "gtest-" -#define GTEST_FLAG_PREFIX_UPPER_ "GTEST_" -#define GTEST_NAME_ "Google Test" -#define GTEST_PROJECT_URL_ "http://code.google.com/p/googletest/" - -// Determines the version of gcc that is used to compile this. -#ifdef __GNUC__ -// 40302 means version 4.3.2. -# define GTEST_GCC_VER_ \ - (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) -#endif // __GNUC__ - -// Determines the platform on which Google Test is compiled. -#ifdef __CYGWIN__ -# define GTEST_OS_CYGWIN 1 -#elif defined __SYMBIAN32__ -# define GTEST_OS_SYMBIAN 1 -#elif defined _WIN32 -# define GTEST_OS_WINDOWS 1 -# ifdef _WIN32_WCE -# define GTEST_OS_WINDOWS_MOBILE 1 -# elif defined(__MINGW__) || defined(__MINGW32__) -# define GTEST_OS_WINDOWS_MINGW 1 -# else -# define GTEST_OS_WINDOWS_DESKTOP 1 -# endif // _WIN32_WCE -#elif defined __APPLE__ -# define GTEST_OS_MAC 1 -#elif defined __linux__ -# define GTEST_OS_LINUX 1 -# ifdef ANDROID -# define GTEST_OS_LINUX_ANDROID 1 -# endif // ANDROID -#elif defined __MVS__ -# define GTEST_OS_ZOS 1 -#elif defined(__sun) && defined(__SVR4) -# define GTEST_OS_SOLARIS 1 -#elif defined(_AIX) -# define GTEST_OS_AIX 1 -#elif defined(__hpux) -# define GTEST_OS_HPUX 1 -#elif defined __native_client__ -# define GTEST_OS_NACL 1 -#endif // __CYGWIN__ - -// Brings in definitions for functions used in the testing::internal::posix -// namespace (read, write, close, chdir, isatty, stat). We do not currently -// use them on Windows Mobile. -#if !GTEST_OS_WINDOWS -// This assumes that non-Windows OSes provide unistd.h. For OSes where this -// is not the case, we need to include headers that provide the functions -// mentioned above. -# include -# if !GTEST_OS_NACL -// TODO(vladl@google.com): Remove this condition when Native Client SDK adds -// strings.h (tracked in -// http://code.google.com/p/nativeclient/issues/detail?id=1175). -# include // Native Client doesn't provide strings.h. -# endif -#elif !GTEST_OS_WINDOWS_MOBILE -# include -# include -#endif - -// Defines this to true iff Google Test can use POSIX regular expressions. -#ifndef GTEST_HAS_POSIX_RE -# define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS) -#endif - -#if GTEST_HAS_POSIX_RE - -// On some platforms, needs someone to define size_t, and -// won't compile otherwise. We can #include it here as we already -// included , which is guaranteed to define size_t through -// . -# include // NOLINT - -# define GTEST_USES_POSIX_RE 1 - -#elif GTEST_OS_WINDOWS - -// is not available on Windows. Use our own simple regex -// implementation instead. -# define GTEST_USES_SIMPLE_RE 1 - -#else - -// may not be available on this platform. Use our own -// simple regex implementation instead. -# define GTEST_USES_SIMPLE_RE 1 - -#endif // GTEST_HAS_POSIX_RE - -#ifndef GTEST_HAS_EXCEPTIONS -// The user didn't tell us whether exceptions are enabled, so we need -// to figure it out. -# if defined(_MSC_VER) || defined(__BORLANDC__) -// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS -// macro to enable exceptions, so we'll do the same. -// Assumes that exceptions are enabled by default. -# ifndef _HAS_EXCEPTIONS -# define _HAS_EXCEPTIONS 1 -# endif // _HAS_EXCEPTIONS -# define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS -# elif defined(__GNUC__) && __EXCEPTIONS -// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled. -# define GTEST_HAS_EXCEPTIONS 1 -# elif defined(__SUNPRO_CC) -// Sun Pro CC supports exceptions. However, there is no compile-time way of -// detecting whether they are enabled or not. Therefore, we assume that -// they are enabled unless the user tells us otherwise. -# define GTEST_HAS_EXCEPTIONS 1 -# elif defined(__IBMCPP__) && __EXCEPTIONS -// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled. -# define GTEST_HAS_EXCEPTIONS 1 -# elif defined(__HP_aCC) -// Exception handling is in effect by default in HP aCC compiler. It has to -// be turned of by +noeh compiler option if desired. -# define GTEST_HAS_EXCEPTIONS 1 -# else -// For other compilers, we assume exceptions are disabled to be -// conservative. -# define GTEST_HAS_EXCEPTIONS 0 -# endif // defined(_MSC_VER) || defined(__BORLANDC__) -#endif // GTEST_HAS_EXCEPTIONS - -#if !defined(GTEST_HAS_STD_STRING) -// Even though we don't use this macro any longer, we keep it in case -// some clients still depend on it. -# define GTEST_HAS_STD_STRING 1 -#elif !GTEST_HAS_STD_STRING -// The user told us that ::std::string isn't available. -# error "Google Test cannot be used where ::std::string isn't available." -#endif // !defined(GTEST_HAS_STD_STRING) - -#ifndef GTEST_HAS_GLOBAL_STRING -// The user didn't tell us whether ::string is available, so we need -// to figure it out. - -# define GTEST_HAS_GLOBAL_STRING 0 - -#endif // GTEST_HAS_GLOBAL_STRING - -#ifndef GTEST_HAS_STD_WSTRING -// The user didn't tell us whether ::std::wstring is available, so we need -// to figure it out. -// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring -// is available. - -// Cygwin 1.7 and below doesn't support ::std::wstring. -// Solaris' libc++ doesn't support it either. Android has -// no support for it at least as recent as Froyo (2.2). -# define GTEST_HAS_STD_WSTRING \ - (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS)) - -#endif // GTEST_HAS_STD_WSTRING - -#ifndef GTEST_HAS_GLOBAL_WSTRING -// The user didn't tell us whether ::wstring is available, so we need -// to figure it out. -# define GTEST_HAS_GLOBAL_WSTRING \ - (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING) -#endif // GTEST_HAS_GLOBAL_WSTRING - -// Determines whether RTTI is available. -#ifndef GTEST_HAS_RTTI -// The user didn't tell us whether RTTI is enabled, so we need to -// figure it out. - -# ifdef _MSC_VER - -# ifdef _CPPRTTI // MSVC defines this macro iff RTTI is enabled. -# define GTEST_HAS_RTTI 1 -# else -# define GTEST_HAS_RTTI 0 -# endif - -// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled. -# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302) - -# ifdef __GXX_RTTI -# define GTEST_HAS_RTTI 1 -# else -# define GTEST_HAS_RTTI 0 -# endif // __GXX_RTTI - -// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if -// both the typeid and dynamic_cast features are present. -# elif defined(__IBMCPP__) && (__IBMCPP__ >= 900) - -# ifdef __RTTI_ALL__ -# define GTEST_HAS_RTTI 1 -# else -# define GTEST_HAS_RTTI 0 -# endif - -# else - -// For all other compilers, we assume RTTI is enabled. -# define GTEST_HAS_RTTI 1 - -# endif // _MSC_VER - -#endif // GTEST_HAS_RTTI - -// It's this header's responsibility to #include when RTTI -// is enabled. -#if GTEST_HAS_RTTI -# include -#endif - -// Determines whether Google Test can use the pthreads library. -#ifndef GTEST_HAS_PTHREAD -// The user didn't tell us explicitly, so we assume pthreads support is -// available on Linux and Mac. -// -// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0 -// to your compiler flags. -# define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX) -#endif // GTEST_HAS_PTHREAD - -#if GTEST_HAS_PTHREAD -// gtest-port.h guarantees to #include when GTEST_HAS_PTHREAD is -// true. -# include // NOLINT - -// For timespec and nanosleep, used below. -# include // NOLINT -#endif - -// Determines whether Google Test can use tr1/tuple. You can define -// this macro to 0 to prevent Google Test from using tuple (any -// feature depending on tuple with be disabled in this mode). -#ifndef GTEST_HAS_TR1_TUPLE -// The user didn't tell us not to do it, so we assume it's OK. -# define GTEST_HAS_TR1_TUPLE 1 -#endif // GTEST_HAS_TR1_TUPLE - -// Determines whether Google Test's own tr1 tuple implementation -// should be used. -#ifndef GTEST_USE_OWN_TR1_TUPLE -// The user didn't tell us, so we need to figure it out. - -// We use our own TR1 tuple if we aren't sure the user has an -// implementation of it already. At this time, GCC 4.0.0+ and MSVC -// 2010 are the only mainstream compilers that come with a TR1 tuple -// implementation. NVIDIA's CUDA NVCC compiler pretends to be GCC by -// defining __GNUC__ and friends, but cannot compile GCC's tuple -// implementation. MSVC 2008 (9.0) provides TR1 tuple in a 323 MB -// Feature Pack download, which we cannot assume the user has. -# if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000)) \ - || _MSC_VER >= 1600 -# define GTEST_USE_OWN_TR1_TUPLE 0 -# else -# define GTEST_USE_OWN_TR1_TUPLE 1 -# endif - -#endif // GTEST_USE_OWN_TR1_TUPLE - -// To avoid conditional compilation everywhere, we make it -// gtest-port.h's responsibility to #include the header implementing -// tr1/tuple. -#if GTEST_HAS_TR1_TUPLE - -# if GTEST_USE_OWN_TR1_TUPLE -// This file was GENERATED by a script. DO NOT EDIT BY HAND!!! - -// Copyright 2009 Google Inc. -// All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) - -// Implements a subset of TR1 tuple needed by Google Test and Google Mock. - -#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ -#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ - -#include // For ::std::pair. - -// The compiler used in Symbian has a bug that prevents us from declaring the -// tuple template as a friend (it complains that tuple is redefined). This -// hack bypasses the bug by declaring the members that should otherwise be -// private as public. -// Sun Studio versions < 12 also have the above bug. -#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590) -# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public: -#else -# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \ - template friend class tuple; \ - private: -#endif - -// GTEST_n_TUPLE_(T) is the type of an n-tuple. -#define GTEST_0_TUPLE_(T) tuple<> -#define GTEST_1_TUPLE_(T) tuple -#define GTEST_2_TUPLE_(T) tuple -#define GTEST_3_TUPLE_(T) tuple -#define GTEST_4_TUPLE_(T) tuple -#define GTEST_5_TUPLE_(T) tuple -#define GTEST_6_TUPLE_(T) tuple -#define GTEST_7_TUPLE_(T) tuple -#define GTEST_8_TUPLE_(T) tuple -#define GTEST_9_TUPLE_(T) tuple -#define GTEST_10_TUPLE_(T) tuple - -// GTEST_n_TYPENAMES_(T) declares a list of n typenames. -#define GTEST_0_TYPENAMES_(T) -#define GTEST_1_TYPENAMES_(T) typename T##0 -#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1 -#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2 -#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ - typename T##3 -#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ - typename T##3, typename T##4 -#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ - typename T##3, typename T##4, typename T##5 -#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ - typename T##3, typename T##4, typename T##5, typename T##6 -#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ - typename T##3, typename T##4, typename T##5, typename T##6, typename T##7 -#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ - typename T##3, typename T##4, typename T##5, typename T##6, \ - typename T##7, typename T##8 -#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ - typename T##3, typename T##4, typename T##5, typename T##6, \ - typename T##7, typename T##8, typename T##9 - -// In theory, defining stuff in the ::std namespace is undefined -// behavior. We can do this as we are playing the role of a standard -// library vendor. -namespace std { -namespace tr1 { - -template -class tuple; - -// Anything in namespace gtest_internal is Google Test's INTERNAL -// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code. -namespace gtest_internal { - -// ByRef::type is T if T is a reference; otherwise it's const T&. -template -struct ByRef { typedef const T& type; }; // NOLINT -template -struct ByRef { typedef T& type; }; // NOLINT - -// A handy wrapper for ByRef. -#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef::type - -// AddRef::type is T if T is a reference; otherwise it's T&. This -// is the same as tr1::add_reference::type. -template -struct AddRef { typedef T& type; }; // NOLINT -template -struct AddRef { typedef T& type; }; // NOLINT - -// A handy wrapper for AddRef. -#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef::type - -// A helper for implementing get(). -template class Get; - -// A helper for implementing tuple_element. kIndexValid is true -// iff k < the number of fields in tuple type T. -template -struct TupleElement; - -template -struct TupleElement { typedef T0 type; }; - -template -struct TupleElement { typedef T1 type; }; - -template -struct TupleElement { typedef T2 type; }; - -template -struct TupleElement { typedef T3 type; }; - -template -struct TupleElement { typedef T4 type; }; - -template -struct TupleElement { typedef T5 type; }; - -template -struct TupleElement { typedef T6 type; }; - -template -struct TupleElement { typedef T7 type; }; - -template -struct TupleElement { typedef T8 type; }; - -template -struct TupleElement { typedef T9 type; }; - -} // namespace gtest_internal - -template <> -class tuple<> { - public: - tuple() {} - tuple(const tuple& /* t */) {} - tuple& operator=(const tuple& /* t */) { return *this; } -}; - -template -class GTEST_1_TUPLE_(T) { - public: - template friend class gtest_internal::Get; - - tuple() : f0_() {} - - explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {} - - tuple(const tuple& t) : f0_(t.f0_) {} - - template - tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {} - - tuple& operator=(const tuple& t) { return CopyFrom(t); } - - template - tuple& operator=(const GTEST_1_TUPLE_(U)& t) { - return CopyFrom(t); - } - - GTEST_DECLARE_TUPLE_AS_FRIEND_ - - template - tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) { - f0_ = t.f0_; - return *this; - } - - T0 f0_; -}; - -template -class GTEST_2_TUPLE_(T) { - public: - template friend class gtest_internal::Get; - - tuple() : f0_(), f1_() {} - - explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0), - f1_(f1) {} - - tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {} - - template - tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {} - template - tuple(const ::std::pair& p) : f0_(p.first), f1_(p.second) {} - - tuple& operator=(const tuple& t) { return CopyFrom(t); } - - template - tuple& operator=(const GTEST_2_TUPLE_(U)& t) { - return CopyFrom(t); - } - template - tuple& operator=(const ::std::pair& p) { - f0_ = p.first; - f1_ = p.second; - return *this; - } - - GTEST_DECLARE_TUPLE_AS_FRIEND_ - - template - tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) { - f0_ = t.f0_; - f1_ = t.f1_; - return *this; - } - - T0 f0_; - T1 f1_; -}; - -template -class GTEST_3_TUPLE_(T) { - public: - template friend class gtest_internal::Get; - - tuple() : f0_(), f1_(), f2_() {} - - explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, - GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {} - - tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {} - - template - tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {} - - tuple& operator=(const tuple& t) { return CopyFrom(t); } - - template - tuple& operator=(const GTEST_3_TUPLE_(U)& t) { - return CopyFrom(t); - } - - GTEST_DECLARE_TUPLE_AS_FRIEND_ - - template - tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) { - f0_ = t.f0_; - f1_ = t.f1_; - f2_ = t.f2_; - return *this; - } - - T0 f0_; - T1 f1_; - T2 f2_; -}; - -template -class GTEST_4_TUPLE_(T) { - public: - template friend class gtest_internal::Get; - - tuple() : f0_(), f1_(), f2_(), f3_() {} - - explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, - GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2), - f3_(f3) {} - - tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {} - - template - tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), - f3_(t.f3_) {} - - tuple& operator=(const tuple& t) { return CopyFrom(t); } - - template - tuple& operator=(const GTEST_4_TUPLE_(U)& t) { - return CopyFrom(t); - } - - GTEST_DECLARE_TUPLE_AS_FRIEND_ - - template - tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) { - f0_ = t.f0_; - f1_ = t.f1_; - f2_ = t.f2_; - f3_ = t.f3_; - return *this; - } - - T0 f0_; - T1 f1_; - T2 f2_; - T3 f3_; -}; - -template -class GTEST_5_TUPLE_(T) { - public: - template friend class gtest_internal::Get; - - tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {} - - explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, - GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, - GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {} - - tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), - f4_(t.f4_) {} - - template - tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), - f3_(t.f3_), f4_(t.f4_) {} - - tuple& operator=(const tuple& t) { return CopyFrom(t); } - - template - tuple& operator=(const GTEST_5_TUPLE_(U)& t) { - return CopyFrom(t); - } - - GTEST_DECLARE_TUPLE_AS_FRIEND_ - - template - tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) { - f0_ = t.f0_; - f1_ = t.f1_; - f2_ = t.f2_; - f3_ = t.f3_; - f4_ = t.f4_; - return *this; - } - - T0 f0_; - T1 f1_; - T2 f2_; - T3 f3_; - T4 f4_; -}; - -template -class GTEST_6_TUPLE_(T) { - public: - template friend class gtest_internal::Get; - - tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {} - - explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, - GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, - GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), - f5_(f5) {} - - tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), - f4_(t.f4_), f5_(t.f5_) {} - - template - tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), - f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {} - - tuple& operator=(const tuple& t) { return CopyFrom(t); } - - template - tuple& operator=(const GTEST_6_TUPLE_(U)& t) { - return CopyFrom(t); - } - - GTEST_DECLARE_TUPLE_AS_FRIEND_ - - template - tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) { - f0_ = t.f0_; - f1_ = t.f1_; - f2_ = t.f2_; - f3_ = t.f3_; - f4_ = t.f4_; - f5_ = t.f5_; - return *this; - } - - T0 f0_; - T1 f1_; - T2 f2_; - T3 f3_; - T4 f4_; - T5 f5_; -}; - -template -class GTEST_7_TUPLE_(T) { - public: - template friend class gtest_internal::Get; - - tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {} - - explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, - GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, - GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2), - f3_(f3), f4_(f4), f5_(f5), f6_(f6) {} - - tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), - f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {} - - template - tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), - f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {} - - tuple& operator=(const tuple& t) { return CopyFrom(t); } - - template - tuple& operator=(const GTEST_7_TUPLE_(U)& t) { - return CopyFrom(t); - } - - GTEST_DECLARE_TUPLE_AS_FRIEND_ - - template - tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) { - f0_ = t.f0_; - f1_ = t.f1_; - f2_ = t.f2_; - f3_ = t.f3_; - f4_ = t.f4_; - f5_ = t.f5_; - f6_ = t.f6_; - return *this; - } - - T0 f0_; - T1 f1_; - T2 f2_; - T3 f3_; - T4 f4_; - T5 f5_; - T6 f6_; -}; - -template -class GTEST_8_TUPLE_(T) { - public: - template friend class gtest_internal::Get; - - tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {} - - explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, - GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, - GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, - GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), - f5_(f5), f6_(f6), f7_(f7) {} - - tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), - f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {} - - template - tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), - f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {} - - tuple& operator=(const tuple& t) { return CopyFrom(t); } - - template - tuple& operator=(const GTEST_8_TUPLE_(U)& t) { - return CopyFrom(t); - } - - GTEST_DECLARE_TUPLE_AS_FRIEND_ - - template - tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) { - f0_ = t.f0_; - f1_ = t.f1_; - f2_ = t.f2_; - f3_ = t.f3_; - f4_ = t.f4_; - f5_ = t.f5_; - f6_ = t.f6_; - f7_ = t.f7_; - return *this; - } - - T0 f0_; - T1 f1_; - T2 f2_; - T3 f3_; - T4 f4_; - T5 f5_; - T6 f6_; - T7 f7_; -}; - -template -class GTEST_9_TUPLE_(T) { - public: - template friend class gtest_internal::Get; - - tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {} - - explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, - GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, - GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7, - GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), - f5_(f5), f6_(f6), f7_(f7), f8_(f8) {} - - tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), - f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {} - - template - tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), - f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {} - - tuple& operator=(const tuple& t) { return CopyFrom(t); } - - template - tuple& operator=(const GTEST_9_TUPLE_(U)& t) { - return CopyFrom(t); - } - - GTEST_DECLARE_TUPLE_AS_FRIEND_ - - template - tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) { - f0_ = t.f0_; - f1_ = t.f1_; - f2_ = t.f2_; - f3_ = t.f3_; - f4_ = t.f4_; - f5_ = t.f5_; - f6_ = t.f6_; - f7_ = t.f7_; - f8_ = t.f8_; - return *this; - } - - T0 f0_; - T1 f1_; - T2 f2_; - T3 f3_; - T4 f4_; - T5 f5_; - T6 f6_; - T7 f7_; - T8 f8_; -}; - -template -class tuple { - public: - template friend class gtest_internal::Get; - - tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(), - f9_() {} - - explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, - GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, - GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7, - GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2), - f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {} - - tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), - f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {} - - template - tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), - f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), - f9_(t.f9_) {} - - tuple& operator=(const tuple& t) { return CopyFrom(t); } - - template - tuple& operator=(const GTEST_10_TUPLE_(U)& t) { - return CopyFrom(t); - } - - GTEST_DECLARE_TUPLE_AS_FRIEND_ - - template - tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) { - f0_ = t.f0_; - f1_ = t.f1_; - f2_ = t.f2_; - f3_ = t.f3_; - f4_ = t.f4_; - f5_ = t.f5_; - f6_ = t.f6_; - f7_ = t.f7_; - f8_ = t.f8_; - f9_ = t.f9_; - return *this; - } - - T0 f0_; - T1 f1_; - T2 f2_; - T3 f3_; - T4 f4_; - T5 f5_; - T6 f6_; - T7 f7_; - T8 f8_; - T9 f9_; -}; - -// 6.1.3.2 Tuple creation functions. - -// Known limitations: we don't support passing an -// std::tr1::reference_wrapper to make_tuple(). And we don't -// implement tie(). - -inline tuple<> make_tuple() { return tuple<>(); } - -template -inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) { - return GTEST_1_TUPLE_(T)(f0); -} - -template -inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) { - return GTEST_2_TUPLE_(T)(f0, f1); -} - -template -inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) { - return GTEST_3_TUPLE_(T)(f0, f1, f2); -} - -template -inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, - const T3& f3) { - return GTEST_4_TUPLE_(T)(f0, f1, f2, f3); -} - -template -inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, - const T3& f3, const T4& f4) { - return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4); -} - -template -inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, - const T3& f3, const T4& f4, const T5& f5) { - return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5); -} - -template -inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, - const T3& f3, const T4& f4, const T5& f5, const T6& f6) { - return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6); -} - -template -inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, - const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) { - return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7); -} - -template -inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, - const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7, - const T8& f8) { - return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8); -} - -template -inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, - const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7, - const T8& f8, const T9& f9) { - return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9); -} - -// 6.1.3.3 Tuple helper classes. - -template struct tuple_size; - -template -struct tuple_size { static const int value = 0; }; - -template -struct tuple_size { static const int value = 1; }; - -template -struct tuple_size { static const int value = 2; }; - -template -struct tuple_size { static const int value = 3; }; - -template -struct tuple_size { static const int value = 4; }; - -template -struct tuple_size { static const int value = 5; }; - -template -struct tuple_size { static const int value = 6; }; - -template -struct tuple_size { static const int value = 7; }; - -template -struct tuple_size { static const int value = 8; }; - -template -struct tuple_size { static const int value = 9; }; - -template -struct tuple_size { static const int value = 10; }; - -template -struct tuple_element { - typedef typename gtest_internal::TupleElement< - k < (tuple_size::value), k, Tuple>::type type; -}; - -#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element::type - -// 6.1.3.4 Element access. - -namespace gtest_internal { - -template <> -class Get<0> { - public: - template - static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple)) - Field(Tuple& t) { return t.f0_; } // NOLINT - - template - static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple)) - ConstField(const Tuple& t) { return t.f0_; } -}; - -template <> -class Get<1> { - public: - template - static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple)) - Field(Tuple& t) { return t.f1_; } // NOLINT - - template - static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple)) - ConstField(const Tuple& t) { return t.f1_; } -}; - -template <> -class Get<2> { - public: - template - static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple)) - Field(Tuple& t) { return t.f2_; } // NOLINT - - template - static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple)) - ConstField(const Tuple& t) { return t.f2_; } -}; - -template <> -class Get<3> { - public: - template - static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple)) - Field(Tuple& t) { return t.f3_; } // NOLINT - - template - static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple)) - ConstField(const Tuple& t) { return t.f3_; } -}; - -template <> -class Get<4> { - public: - template - static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple)) - Field(Tuple& t) { return t.f4_; } // NOLINT - - template - static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple)) - ConstField(const Tuple& t) { return t.f4_; } -}; - -template <> -class Get<5> { - public: - template - static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple)) - Field(Tuple& t) { return t.f5_; } // NOLINT - - template - static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple)) - ConstField(const Tuple& t) { return t.f5_; } -}; - -template <> -class Get<6> { - public: - template - static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple)) - Field(Tuple& t) { return t.f6_; } // NOLINT - - template - static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple)) - ConstField(const Tuple& t) { return t.f6_; } -}; - -template <> -class Get<7> { - public: - template - static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple)) - Field(Tuple& t) { return t.f7_; } // NOLINT - - template - static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple)) - ConstField(const Tuple& t) { return t.f7_; } -}; - -template <> -class Get<8> { - public: - template - static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple)) - Field(Tuple& t) { return t.f8_; } // NOLINT - - template - static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple)) - ConstField(const Tuple& t) { return t.f8_; } -}; - -template <> -class Get<9> { - public: - template - static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple)) - Field(Tuple& t) { return t.f9_; } // NOLINT - - template - static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple)) - ConstField(const Tuple& t) { return t.f9_; } -}; - -} // namespace gtest_internal - -template -GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T))) -get(GTEST_10_TUPLE_(T)& t) { - return gtest_internal::Get::Field(t); -} - -template -GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T))) -get(const GTEST_10_TUPLE_(T)& t) { - return gtest_internal::Get::ConstField(t); -} - -// 6.1.3.5 Relational operators - -// We only implement == and !=, as we don't have a need for the rest yet. - -namespace gtest_internal { - -// SameSizeTuplePrefixComparator::Eq(t1, t2) returns true if the -// first k fields of t1 equals the first k fields of t2. -// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if -// k1 != k2. -template -struct SameSizeTuplePrefixComparator; - -template <> -struct SameSizeTuplePrefixComparator<0, 0> { - template - static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) { - return true; - } -}; - -template -struct SameSizeTuplePrefixComparator { - template - static bool Eq(const Tuple1& t1, const Tuple2& t2) { - return SameSizeTuplePrefixComparator::Eq(t1, t2) && - ::std::tr1::get(t1) == ::std::tr1::get(t2); - } -}; - -} // namespace gtest_internal - -template -inline bool operator==(const GTEST_10_TUPLE_(T)& t, - const GTEST_10_TUPLE_(U)& u) { - return gtest_internal::SameSizeTuplePrefixComparator< - tuple_size::value, - tuple_size::value>::Eq(t, u); -} - -template -inline bool operator!=(const GTEST_10_TUPLE_(T)& t, - const GTEST_10_TUPLE_(U)& u) { return !(t == u); } - -// 6.1.4 Pairs. -// Unimplemented. - -} // namespace tr1 -} // namespace std - -#undef GTEST_0_TUPLE_ -#undef GTEST_1_TUPLE_ -#undef GTEST_2_TUPLE_ -#undef GTEST_3_TUPLE_ -#undef GTEST_4_TUPLE_ -#undef GTEST_5_TUPLE_ -#undef GTEST_6_TUPLE_ -#undef GTEST_7_TUPLE_ -#undef GTEST_8_TUPLE_ -#undef GTEST_9_TUPLE_ -#undef GTEST_10_TUPLE_ - -#undef GTEST_0_TYPENAMES_ -#undef GTEST_1_TYPENAMES_ -#undef GTEST_2_TYPENAMES_ -#undef GTEST_3_TYPENAMES_ -#undef GTEST_4_TYPENAMES_ -#undef GTEST_5_TYPENAMES_ -#undef GTEST_6_TYPENAMES_ -#undef GTEST_7_TYPENAMES_ -#undef GTEST_8_TYPENAMES_ -#undef GTEST_9_TYPENAMES_ -#undef GTEST_10_TYPENAMES_ - -#undef GTEST_DECLARE_TUPLE_AS_FRIEND_ -#undef GTEST_BY_REF_ -#undef GTEST_ADD_REF_ -#undef GTEST_TUPLE_ELEMENT_ - -#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ -# elif GTEST_OS_SYMBIAN - -// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to -// use STLport's tuple implementation, which unfortunately doesn't -// work as the copy of STLport distributed with Symbian is incomplete. -// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to -// use its own tuple implementation. -# ifdef BOOST_HAS_TR1_TUPLE -# undef BOOST_HAS_TR1_TUPLE -# endif // BOOST_HAS_TR1_TUPLE - -// This prevents , which defines -// BOOST_HAS_TR1_TUPLE, from being #included by Boost's . -# define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED -# include - -# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000) -// GCC 4.0+ implements tr1/tuple in the header. This does -// not conform to the TR1 spec, which requires the header to be . - -# if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302 -// Until version 4.3.2, gcc has a bug that causes , -// which is #included by , to not compile when RTTI is -// disabled. _TR1_FUNCTIONAL is the header guard for -// . Hence the following #define is a hack to prevent -// from being included. -# define _TR1_FUNCTIONAL 1 -# include -# undef _TR1_FUNCTIONAL // Allows the user to #include - // if he chooses to. -# else -# include // NOLINT -# endif // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302 - -# else -// If the compiler is not GCC 4.0+, we assume the user is using a -// spec-conforming TR1 implementation. -# include // NOLINT -# endif // GTEST_USE_OWN_TR1_TUPLE - -#endif // GTEST_HAS_TR1_TUPLE - -// Determines whether clone(2) is supported. -// Usually it will only be available on Linux, excluding -// Linux on the Itanium architecture. -// Also see http://linux.die.net/man/2/clone. -#ifndef GTEST_HAS_CLONE -// The user didn't tell us, so we need to figure it out. - -# if GTEST_OS_LINUX && !defined(__ia64__) -# define GTEST_HAS_CLONE 1 -# else -# define GTEST_HAS_CLONE 0 -# endif // GTEST_OS_LINUX && !defined(__ia64__) - -#endif // GTEST_HAS_CLONE - -// Determines whether to support stream redirection. This is used to test -// output correctness and to implement death tests. -#ifndef GTEST_HAS_STREAM_REDIRECTION -// By default, we assume that stream redirection is supported on all -// platforms except known mobile ones. -# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN -# define GTEST_HAS_STREAM_REDIRECTION 0 -# else -# define GTEST_HAS_STREAM_REDIRECTION 1 -# endif // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN -#endif // GTEST_HAS_STREAM_REDIRECTION - -// Determines whether to support death tests. -// Google Test does not support death tests for VC 7.1 and earlier as -// abort() in a VC 7.1 application compiled as GUI in debug config -// pops up a dialog window that cannot be suppressed programmatically. -#if (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \ - (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \ - GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX) -# define GTEST_HAS_DEATH_TEST 1 -# include // NOLINT -#endif - -// We don't support MSVC 7.1 with exceptions disabled now. Therefore -// all the compilers we care about are adequate for supporting -// value-parameterized tests. -#define GTEST_HAS_PARAM_TEST 1 - -// Determines whether to support type-driven tests. - -// Typed tests need and variadic macros, which GCC, VC++ 8.0, -// Sun Pro CC, IBM Visual Age, and HP aCC support. -#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \ - defined(__IBMCPP__) || defined(__HP_aCC) -# define GTEST_HAS_TYPED_TEST 1 -# define GTEST_HAS_TYPED_TEST_P 1 -#endif - -// Determines whether to support Combine(). This only makes sense when -// value-parameterized tests are enabled. The implementation doesn't -// work on Sun Studio since it doesn't understand templated conversion -// operators. -#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC) -# define GTEST_HAS_COMBINE 1 -#endif - -// Determines whether the system compiler uses UTF-16 for encoding wide strings. -#define GTEST_WIDE_STRING_USES_UTF16_ \ - (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX) - -// Determines whether test results can be streamed to a socket. -#if GTEST_OS_LINUX -# define GTEST_CAN_STREAM_RESULTS_ 1 -#endif - -// Defines some utility macros. - -// The GNU compiler emits a warning if nested "if" statements are followed by -// an "else" statement and braces are not used to explicitly disambiguate the -// "else" binding. This leads to problems with code like: -// -// if (gate) -// ASSERT_*(condition) << "Some message"; -// -// The "switch (0) case 0:" idiom is used to suppress this. -#ifdef __INTEL_COMPILER -# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ -#else -# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default: // NOLINT -#endif - -// Use this annotation at the end of a struct/class definition to -// prevent the compiler from optimizing away instances that are never -// used. This is useful when all interesting logic happens inside the -// c'tor and / or d'tor. Example: -// -// struct Foo { -// Foo() { ... } -// } GTEST_ATTRIBUTE_UNUSED_; -// -// Also use it after a variable or parameter declaration to tell the -// compiler the variable/parameter does not have to be used. -#if defined(__GNUC__) && !defined(COMPILER_ICC) -# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused)) -#else -# define GTEST_ATTRIBUTE_UNUSED_ -#endif - -// A macro to disallow operator= -// This should be used in the private: declarations for a class. -#define GTEST_DISALLOW_ASSIGN_(type)\ - void operator=(type const &) - -// A macro to disallow copy constructor and operator= -// This should be used in the private: declarations for a class. -#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\ - type(type const &);\ - GTEST_DISALLOW_ASSIGN_(type) - -// Tell the compiler to warn about unused return values for functions declared -// with this macro. The macro should be used on function declarations -// following the argument list: -// -// Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_; -#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC) -# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result)) -#else -# define GTEST_MUST_USE_RESULT_ -#endif // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC - -// Determine whether the compiler supports Microsoft's Structured Exception -// Handling. This is supported by several Windows compilers but generally -// does not exist on any other system. -#ifndef GTEST_HAS_SEH -// The user didn't tell us, so we need to figure it out. - -# if defined(_MSC_VER) || defined(__BORLANDC__) -// These two compilers are known to support SEH. -# define GTEST_HAS_SEH 1 -# else -// Assume no SEH. -# define GTEST_HAS_SEH 0 -# endif - -#endif // GTEST_HAS_SEH - -#ifdef _MSC_VER - -# if GTEST_LINKED_AS_SHARED_LIBRARY -# define GTEST_API_ __declspec(dllimport) -# elif GTEST_CREATE_SHARED_LIBRARY -# define GTEST_API_ __declspec(dllexport) -# endif - -#endif // _MSC_VER - -#ifndef GTEST_API_ -# define GTEST_API_ -#endif - -#ifdef __GNUC__ -// Ask the compiler to never inline a given function. -# define GTEST_NO_INLINE_ __attribute__((noinline)) -#else -# define GTEST_NO_INLINE_ -#endif - -namespace testing { - -class Message; - -namespace internal { - -class String; - -// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time -// expression is true. For example, you could use it to verify the -// size of a static array: -// -// GTEST_COMPILE_ASSERT_(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES, -// content_type_names_incorrect_size); -// -// or to make sure a struct is smaller than a certain size: -// -// GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large); -// -// The second argument to the macro is the name of the variable. If -// the expression is false, most compilers will issue a warning/error -// containing the name of the variable. - -template -struct CompileAssert { -}; - -#define GTEST_COMPILE_ASSERT_(expr, msg) \ - typedef ::testing::internal::CompileAssert<(bool(expr))> \ - msg[bool(expr) ? 1 : -1] - -// Implementation details of GTEST_COMPILE_ASSERT_: -// -// - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1 -// elements (and thus is invalid) when the expression is false. -// -// - The simpler definition -// -// #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1] -// -// does not work, as gcc supports variable-length arrays whose sizes -// are determined at run-time (this is gcc's extension and not part -// of the C++ standard). As a result, gcc fails to reject the -// following code with the simple definition: -// -// int foo; -// GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is -// // not a compile-time constant. -// -// - By using the type CompileAssert<(bool(expr))>, we ensures that -// expr is a compile-time constant. (Template arguments must be -// determined at compile-time.) -// -// - The outter parentheses in CompileAssert<(bool(expr))> are necessary -// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written -// -// CompileAssert -// -// instead, these compilers will refuse to compile -// -// GTEST_COMPILE_ASSERT_(5 > 0, some_message); -// -// (They seem to think the ">" in "5 > 0" marks the end of the -// template argument list.) -// -// - The array size is (bool(expr) ? 1 : -1), instead of simply -// -// ((expr) ? 1 : -1). -// -// This is to avoid running into a bug in MS VC 7.1, which -// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. - -// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h. -// -// This template is declared, but intentionally undefined. -template -struct StaticAssertTypeEqHelper; - -template -struct StaticAssertTypeEqHelper {}; - -#if GTEST_HAS_GLOBAL_STRING -typedef ::string string; -#else -typedef ::std::string string; -#endif // GTEST_HAS_GLOBAL_STRING - -#if GTEST_HAS_GLOBAL_WSTRING -typedef ::wstring wstring; -#elif GTEST_HAS_STD_WSTRING -typedef ::std::wstring wstring; -#endif // GTEST_HAS_GLOBAL_WSTRING - -// A helper for suppressing warnings on constant condition. It just -// returns 'condition'. -GTEST_API_ bool IsTrue(bool condition); - -// Defines scoped_ptr. - -// This implementation of scoped_ptr is PARTIAL - it only contains -// enough stuff to satisfy Google Test's need. -template -class scoped_ptr { - public: - typedef T element_type; - - explicit scoped_ptr(T* p = NULL) : ptr_(p) {} - ~scoped_ptr() { reset(); } - - T& operator*() const { return *ptr_; } - T* operator->() const { return ptr_; } - T* get() const { return ptr_; } - - T* release() { - T* const ptr = ptr_; - ptr_ = NULL; - return ptr; - } - - void reset(T* p = NULL) { - if (p != ptr_) { - if (IsTrue(sizeof(T) > 0)) { // Makes sure T is a complete type. - delete ptr_; - } - ptr_ = p; - } - } - private: - T* ptr_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr); -}; - -// Defines RE. - -// A simple C++ wrapper for . It uses the POSIX Extended -// Regular Expression syntax. -class GTEST_API_ RE { - public: - // A copy constructor is required by the Standard to initialize object - // references from r-values. - RE(const RE& other) { Init(other.pattern()); } - - // Constructs an RE from a string. - RE(const ::std::string& regex) { Init(regex.c_str()); } // NOLINT - -#if GTEST_HAS_GLOBAL_STRING - - RE(const ::string& regex) { Init(regex.c_str()); } // NOLINT - -#endif // GTEST_HAS_GLOBAL_STRING - - RE(const char* regex) { Init(regex); } // NOLINT - ~RE(); - - // Returns the string representation of the regex. - const char* pattern() const { return pattern_; } - - // FullMatch(str, re) returns true iff regular expression re matches - // the entire str. - // PartialMatch(str, re) returns true iff regular expression re - // matches a substring of str (including str itself). - // - // TODO(wan@google.com): make FullMatch() and PartialMatch() work - // when str contains NUL characters. - static bool FullMatch(const ::std::string& str, const RE& re) { - return FullMatch(str.c_str(), re); - } - static bool PartialMatch(const ::std::string& str, const RE& re) { - return PartialMatch(str.c_str(), re); - } - -#if GTEST_HAS_GLOBAL_STRING - - static bool FullMatch(const ::string& str, const RE& re) { - return FullMatch(str.c_str(), re); - } - static bool PartialMatch(const ::string& str, const RE& re) { - return PartialMatch(str.c_str(), re); - } - -#endif // GTEST_HAS_GLOBAL_STRING - - static bool FullMatch(const char* str, const RE& re); - static bool PartialMatch(const char* str, const RE& re); - - private: - void Init(const char* regex); - - // We use a const char* instead of a string, as Google Test may be used - // where string is not available. We also do not use Google Test's own - // String type here, in order to simplify dependencies between the - // files. - const char* pattern_; - bool is_valid_; - -#if GTEST_USES_POSIX_RE - - regex_t full_regex_; // For FullMatch(). - regex_t partial_regex_; // For PartialMatch(). - -#else // GTEST_USES_SIMPLE_RE - - const char* full_pattern_; // For FullMatch(); - -#endif - - GTEST_DISALLOW_ASSIGN_(RE); -}; - -// Formats a source file path and a line number as they would appear -// in an error message from the compiler used to compile this code. -GTEST_API_ ::std::string FormatFileLocation(const char* file, int line); - -// Formats a file location for compiler-independent XML output. -// Although this function is not platform dependent, we put it next to -// FormatFileLocation in order to contrast the two functions. -GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file, - int line); - -// Defines logging utilities: -// GTEST_LOG_(severity) - logs messages at the specified severity level. The -// message itself is streamed into the macro. -// LogToStderr() - directs all log messages to stderr. -// FlushInfoLog() - flushes informational log messages. - -enum GTestLogSeverity { - GTEST_INFO, - GTEST_WARNING, - GTEST_ERROR, - GTEST_FATAL -}; - -// Formats log entry severity, provides a stream object for streaming the -// log message, and terminates the message with a newline when going out of -// scope. -class GTEST_API_ GTestLog { - public: - GTestLog(GTestLogSeverity severity, const char* file, int line); - - // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program. - ~GTestLog(); - - ::std::ostream& GetStream() { return ::std::cerr; } - - private: - const GTestLogSeverity severity_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog); -}; - -#define GTEST_LOG_(severity) \ - ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \ - __FILE__, __LINE__).GetStream() - -inline void LogToStderr() {} -inline void FlushInfoLog() { fflush(NULL); } - -// INTERNAL IMPLEMENTATION - DO NOT USE. -// -// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition -// is not satisfied. -// Synopsys: -// GTEST_CHECK_(boolean_condition); -// or -// GTEST_CHECK_(boolean_condition) << "Additional message"; -// -// This checks the condition and if the condition is not satisfied -// it prints message about the condition violation, including the -// condition itself, plus additional message streamed into it, if any, -// and then it aborts the program. It aborts the program irrespective of -// whether it is built in the debug mode or not. -#define GTEST_CHECK_(condition) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (::testing::internal::IsTrue(condition)) \ - ; \ - else \ - GTEST_LOG_(FATAL) << "Condition " #condition " failed. " - -// An all-mode assert to verify that the given POSIX-style function -// call returns 0 (indicating success). Known limitation: this -// doesn't expand to a balanced 'if' statement, so enclose the macro -// in {} if you need to use it as the only statement in an 'if' -// branch. -#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \ - if (const int gtest_error = (posix_call)) \ - GTEST_LOG_(FATAL) << #posix_call << "failed with error " \ - << gtest_error - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// Use ImplicitCast_ as a safe version of static_cast for upcasting in -// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a -// const Foo*). When you use ImplicitCast_, the compiler checks that -// the cast is safe. Such explicit ImplicitCast_s are necessary in -// surprisingly many situations where C++ demands an exact type match -// instead of an argument type convertable to a target type. -// -// The syntax for using ImplicitCast_ is the same as for static_cast: -// -// ImplicitCast_(expr) -// -// ImplicitCast_ would have been part of the C++ standard library, -// but the proposal was submitted too late. It will probably make -// its way into the language in the future. -// -// This relatively ugly name is intentional. It prevents clashes with -// similar functions users may have (e.g., implicit_cast). The internal -// namespace alone is not enough because the function can be found by ADL. -template -inline To ImplicitCast_(To x) { return x; } - -// When you upcast (that is, cast a pointer from type Foo to type -// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts -// always succeed. When you downcast (that is, cast a pointer from -// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because -// how do you know the pointer is really of type SubclassOfFoo? It -// could be a bare Foo, or of type DifferentSubclassOfFoo. Thus, -// when you downcast, you should use this macro. In debug mode, we -// use dynamic_cast<> to double-check the downcast is legal (we die -// if it's not). In normal mode, we do the efficient static_cast<> -// instead. Thus, it's important to test in debug mode to make sure -// the cast is legal! -// This is the only place in the code we should use dynamic_cast<>. -// In particular, you SHOULDN'T be using dynamic_cast<> in order to -// do RTTI (eg code like this: -// if (dynamic_cast(foo)) HandleASubclass1Object(foo); -// if (dynamic_cast(foo)) HandleASubclass2Object(foo); -// You should design the code some other way not to need this. -// -// This relatively ugly name is intentional. It prevents clashes with -// similar functions users may have (e.g., down_cast). The internal -// namespace alone is not enough because the function can be found by ADL. -template // use like this: DownCast_(foo); -inline To DownCast_(From* f) { // so we only accept pointers - // Ensures that To is a sub-type of From *. This test is here only - // for compile-time type checking, and has no overhead in an - // optimized build at run-time, as it will be optimized away - // completely. - if (false) { - const To to = NULL; - ::testing::internal::ImplicitCast_(to); - } - -#if GTEST_HAS_RTTI - // RTTI: debug mode only! - GTEST_CHECK_(f == NULL || dynamic_cast(f) != NULL); -#endif - return static_cast(f); -} - -// Downcasts the pointer of type Base to Derived. -// Derived must be a subclass of Base. The parameter MUST -// point to a class of type Derived, not any subclass of it. -// When RTTI is available, the function performs a runtime -// check to enforce this. -template -Derived* CheckedDowncastToActualType(Base* base) { -#if GTEST_HAS_RTTI - GTEST_CHECK_(typeid(*base) == typeid(Derived)); - return dynamic_cast(base); // NOLINT -#else - return static_cast(base); // Poor man's downcast. -#endif -} - -#if GTEST_HAS_STREAM_REDIRECTION - -// Defines the stderr capturer: -// CaptureStdout - starts capturing stdout. -// GetCapturedStdout - stops capturing stdout and returns the captured string. -// CaptureStderr - starts capturing stderr. -// GetCapturedStderr - stops capturing stderr and returns the captured string. -// -GTEST_API_ void CaptureStdout(); -GTEST_API_ String GetCapturedStdout(); -GTEST_API_ void CaptureStderr(); -GTEST_API_ String GetCapturedStderr(); - -#endif // GTEST_HAS_STREAM_REDIRECTION - - -#if GTEST_HAS_DEATH_TEST - -// A copy of all command line arguments. Set by InitGoogleTest(). -extern ::std::vector g_argvs; - -// GTEST_HAS_DEATH_TEST implies we have ::std::string. -const ::std::vector& GetArgvs(); - -#endif // GTEST_HAS_DEATH_TEST - -// Defines synchronization primitives. - -#if GTEST_HAS_PTHREAD - -// Sleeps for (roughly) n milli-seconds. This function is only for -// testing Google Test's own constructs. Don't use it in user tests, -// either directly or indirectly. -inline void SleepMilliseconds(int n) { - const timespec time = { - 0, // 0 seconds. - n * 1000L * 1000L, // And n ms. - }; - nanosleep(&time, NULL); -} - -// Allows a controller thread to pause execution of newly created -// threads until notified. Instances of this class must be created -// and destroyed in the controller thread. -// -// This class is only for testing Google Test's own constructs. Do not -// use it in user tests, either directly or indirectly. -class Notification { - public: - Notification() : notified_(false) {} - - // Notifies all threads created with this notification to start. Must - // be called from the controller thread. - void Notify() { notified_ = true; } - - // Blocks until the controller thread notifies. Must be called from a test - // thread. - void WaitForNotification() { - while(!notified_) { - SleepMilliseconds(10); - } - } - - private: - volatile bool notified_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification); -}; - -// As a C-function, ThreadFuncWithCLinkage cannot be templated itself. -// Consequently, it cannot select a correct instantiation of ThreadWithParam -// in order to call its Run(). Introducing ThreadWithParamBase as a -// non-templated base class for ThreadWithParam allows us to bypass this -// problem. -class ThreadWithParamBase { - public: - virtual ~ThreadWithParamBase() {} - virtual void Run() = 0; -}; - -// pthread_create() accepts a pointer to a function type with the C linkage. -// According to the Standard (7.5/1), function types with different linkages -// are different even if they are otherwise identical. Some compilers (for -// example, SunStudio) treat them as different types. Since class methods -// cannot be defined with C-linkage we need to define a free C-function to -// pass into pthread_create(). -extern "C" inline void* ThreadFuncWithCLinkage(void* thread) { - static_cast(thread)->Run(); - return NULL; -} - -// Helper class for testing Google Test's multi-threading constructs. -// To use it, write: -// -// void ThreadFunc(int param) { /* Do things with param */ } -// Notification thread_can_start; -// ... -// // The thread_can_start parameter is optional; you can supply NULL. -// ThreadWithParam thread(&ThreadFunc, 5, &thread_can_start); -// thread_can_start.Notify(); -// -// These classes are only for testing Google Test's own constructs. Do -// not use them in user tests, either directly or indirectly. -template -class ThreadWithParam : public ThreadWithParamBase { - public: - typedef void (*UserThreadFunc)(T); - - ThreadWithParam( - UserThreadFunc func, T param, Notification* thread_can_start) - : func_(func), - param_(param), - thread_can_start_(thread_can_start), - finished_(false) { - ThreadWithParamBase* const base = this; - // The thread can be created only after all fields except thread_ - // have been initialized. - GTEST_CHECK_POSIX_SUCCESS_( - pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base)); - } - ~ThreadWithParam() { Join(); } - - void Join() { - if (!finished_) { - GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0)); - finished_ = true; - } - } - - virtual void Run() { - if (thread_can_start_ != NULL) - thread_can_start_->WaitForNotification(); - func_(param_); - } - - private: - const UserThreadFunc func_; // User-supplied thread function. - const T param_; // User-supplied parameter to the thread function. - // When non-NULL, used to block execution until the controller thread - // notifies. - Notification* const thread_can_start_; - bool finished_; // true iff we know that the thread function has finished. - pthread_t thread_; // The native thread object. - - GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam); -}; - -// MutexBase and Mutex implement mutex on pthreads-based platforms. They -// are used in conjunction with class MutexLock: -// -// Mutex mutex; -// ... -// MutexLock lock(&mutex); // Acquires the mutex and releases it at the end -// // of the current scope. -// -// MutexBase implements behavior for both statically and dynamically -// allocated mutexes. Do not use MutexBase directly. Instead, write -// the following to define a static mutex: -// -// GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex); -// -// You can forward declare a static mutex like this: -// -// GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex); -// -// To create a dynamic mutex, just define an object of type Mutex. -class MutexBase { - public: - // Acquires this mutex. - void Lock() { - GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_)); - owner_ = pthread_self(); - } - - // Releases this mutex. - void Unlock() { - // We don't protect writing to owner_ here, as it's the caller's - // responsibility to ensure that the current thread holds the - // mutex when this is called. - owner_ = 0; - GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_)); - } - - // Does nothing if the current thread holds the mutex. Otherwise, crashes - // with high probability. - void AssertHeld() const { - GTEST_CHECK_(owner_ == pthread_self()) - << "The current thread is not holding the mutex @" << this; - } - - // A static mutex may be used before main() is entered. It may even - // be used before the dynamic initialization stage. Therefore we - // must be able to initialize a static mutex object at link time. - // This means MutexBase has to be a POD and its member variables - // have to be public. - public: - pthread_mutex_t mutex_; // The underlying pthread mutex. - pthread_t owner_; // The thread holding the mutex; 0 means no one holds it. -}; - -// Forward-declares a static mutex. -# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \ - extern ::testing::internal::MutexBase mutex - -// Defines and statically (i.e. at link time) initializes a static mutex. -# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \ - ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, 0 } - -// The Mutex class can only be used for mutexes created at runtime. It -// shares its API with MutexBase otherwise. -class Mutex : public MutexBase { - public: - Mutex() { - GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL)); - owner_ = 0; - } - ~Mutex() { - GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_)); - } - - private: - GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex); -}; - -// We cannot name this class MutexLock as the ctor declaration would -// conflict with a macro named MutexLock, which is defined on some -// platforms. Hence the typedef trick below. -class GTestMutexLock { - public: - explicit GTestMutexLock(MutexBase* mutex) - : mutex_(mutex) { mutex_->Lock(); } - - ~GTestMutexLock() { mutex_->Unlock(); } - - private: - MutexBase* const mutex_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock); -}; - -typedef GTestMutexLock MutexLock; - -// Helpers for ThreadLocal. - -// pthread_key_create() requires DeleteThreadLocalValue() to have -// C-linkage. Therefore it cannot be templatized to access -// ThreadLocal. Hence the need for class -// ThreadLocalValueHolderBase. -class ThreadLocalValueHolderBase { - public: - virtual ~ThreadLocalValueHolderBase() {} -}; - -// Called by pthread to delete thread-local data stored by -// pthread_setspecific(). -extern "C" inline void DeleteThreadLocalValue(void* value_holder) { - delete static_cast(value_holder); -} - -// Implements thread-local storage on pthreads-based systems. -// -// // Thread 1 -// ThreadLocal tl(100); // 100 is the default value for each thread. -// -// // Thread 2 -// tl.set(150); // Changes the value for thread 2 only. -// EXPECT_EQ(150, tl.get()); -// -// // Thread 1 -// EXPECT_EQ(100, tl.get()); // In thread 1, tl has the original value. -// tl.set(200); -// EXPECT_EQ(200, tl.get()); -// -// The template type argument T must have a public copy constructor. -// In addition, the default ThreadLocal constructor requires T to have -// a public default constructor. -// -// An object managed for a thread by a ThreadLocal instance is deleted -// when the thread exits. Or, if the ThreadLocal instance dies in -// that thread, when the ThreadLocal dies. It's the user's -// responsibility to ensure that all other threads using a ThreadLocal -// have exited when it dies, or the per-thread objects for those -// threads will not be deleted. -// -// Google Test only uses global ThreadLocal objects. That means they -// will die after main() has returned. Therefore, no per-thread -// object managed by Google Test will be leaked as long as all threads -// using Google Test have exited when main() returns. -template -class ThreadLocal { - public: - ThreadLocal() : key_(CreateKey()), - default_() {} - explicit ThreadLocal(const T& value) : key_(CreateKey()), - default_(value) {} - - ~ThreadLocal() { - // Destroys the managed object for the current thread, if any. - DeleteThreadLocalValue(pthread_getspecific(key_)); - - // Releases resources associated with the key. This will *not* - // delete managed objects for other threads. - GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_)); - } - - T* pointer() { return GetOrCreateValue(); } - const T* pointer() const { return GetOrCreateValue(); } - const T& get() const { return *pointer(); } - void set(const T& value) { *pointer() = value; } - - private: - // Holds a value of type T. - class ValueHolder : public ThreadLocalValueHolderBase { - public: - explicit ValueHolder(const T& value) : value_(value) {} - - T* pointer() { return &value_; } - - private: - T value_; - GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder); - }; - - static pthread_key_t CreateKey() { - pthread_key_t key; - // When a thread exits, DeleteThreadLocalValue() will be called on - // the object managed for that thread. - GTEST_CHECK_POSIX_SUCCESS_( - pthread_key_create(&key, &DeleteThreadLocalValue)); - return key; - } - - T* GetOrCreateValue() const { - ThreadLocalValueHolderBase* const holder = - static_cast(pthread_getspecific(key_)); - if (holder != NULL) { - return CheckedDowncastToActualType(holder)->pointer(); - } - - ValueHolder* const new_holder = new ValueHolder(default_); - ThreadLocalValueHolderBase* const holder_base = new_holder; - GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base)); - return new_holder->pointer(); - } - - // A key pthreads uses for looking up per-thread values. - const pthread_key_t key_; - const T default_; // The default value for each thread. - - GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal); -}; - -# define GTEST_IS_THREADSAFE 1 - -#else // GTEST_HAS_PTHREAD - -// A dummy implementation of synchronization primitives (mutex, lock, -// and thread-local variable). Necessary for compiling Google Test where -// mutex is not supported - using Google Test in multiple threads is not -// supported on such platforms. - -class Mutex { - public: - Mutex() {} - void AssertHeld() const {} -}; - -# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \ - extern ::testing::internal::Mutex mutex - -# define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex - -class GTestMutexLock { - public: - explicit GTestMutexLock(Mutex*) {} // NOLINT -}; - -typedef GTestMutexLock MutexLock; - -template -class ThreadLocal { - public: - ThreadLocal() : value_() {} - explicit ThreadLocal(const T& value) : value_(value) {} - T* pointer() { return &value_; } - const T* pointer() const { return &value_; } - const T& get() const { return value_; } - void set(const T& value) { value_ = value; } - private: - T value_; -}; - -// The above synchronization primitives have dummy implementations. -// Therefore Google Test is not thread-safe. -# define GTEST_IS_THREADSAFE 0 - -#endif // GTEST_HAS_PTHREAD - -// Returns the number of threads running in the process, or 0 to indicate that -// we cannot detect it. -GTEST_API_ size_t GetThreadCount(); - -// Passing non-POD classes through ellipsis (...) crashes the ARM -// compiler and generates a warning in Sun Studio. The Nokia Symbian -// and the IBM XL C/C++ compiler try to instantiate a copy constructor -// for objects passed through ellipsis (...), failing for uncopyable -// objects. We define this to ensure that only POD is passed through -// ellipsis on these systems. -#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC) -// We lose support for NULL detection where the compiler doesn't like -// passing non-POD classes through ellipsis (...). -# define GTEST_ELLIPSIS_NEEDS_POD_ 1 -#else -# define GTEST_CAN_COMPARE_NULL 1 -#endif - -// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between -// const T& and const T* in a function template. These compilers -// _can_ decide between class template specializations for T and T*, -// so a tr1::type_traits-like is_pointer works. -#if defined(__SYMBIAN32__) || defined(__IBMCPP__) -# define GTEST_NEEDS_IS_POINTER_ 1 -#endif - -template -struct bool_constant { - typedef bool_constant type; - static const bool value = bool_value; -}; -template const bool bool_constant::value; - -typedef bool_constant false_type; -typedef bool_constant true_type; - -template -struct is_pointer : public false_type {}; - -template -struct is_pointer : public true_type {}; - -template -struct IteratorTraits { - typedef typename Iterator::value_type value_type; -}; - -template -struct IteratorTraits { - typedef T value_type; -}; - -template -struct IteratorTraits { - typedef T value_type; -}; - -#if GTEST_OS_WINDOWS -# define GTEST_PATH_SEP_ "\\" -# define GTEST_HAS_ALT_PATH_SEP_ 1 -// The biggest signed integer type the compiler supports. -typedef __int64 BiggestInt; -#else -# define GTEST_PATH_SEP_ "/" -# define GTEST_HAS_ALT_PATH_SEP_ 0 -typedef long long BiggestInt; // NOLINT -#endif // GTEST_OS_WINDOWS - -// Utilities for char. - -// isspace(int ch) and friends accept an unsigned char or EOF. char -// may be signed, depending on the compiler (or compiler flags). -// Therefore we need to cast a char to unsigned char before calling -// isspace(), etc. - -inline bool IsAlpha(char ch) { - return isalpha(static_cast(ch)) != 0; -} -inline bool IsAlNum(char ch) { - return isalnum(static_cast(ch)) != 0; -} -inline bool IsDigit(char ch) { - return isdigit(static_cast(ch)) != 0; -} -inline bool IsLower(char ch) { - return islower(static_cast(ch)) != 0; -} -inline bool IsSpace(char ch) { - return isspace(static_cast(ch)) != 0; -} -inline bool IsUpper(char ch) { - return isupper(static_cast(ch)) != 0; -} -inline bool IsXDigit(char ch) { - return isxdigit(static_cast(ch)) != 0; -} - -inline char ToLower(char ch) { - return static_cast(tolower(static_cast(ch))); -} -inline char ToUpper(char ch) { - return static_cast(toupper(static_cast(ch))); -} - -// The testing::internal::posix namespace holds wrappers for common -// POSIX functions. These wrappers hide the differences between -// Windows/MSVC and POSIX systems. Since some compilers define these -// standard functions as macros, the wrapper cannot have the same name -// as the wrapped function. - -namespace posix { - -// Functions with a different name on Windows. - -#if GTEST_OS_WINDOWS - -typedef struct _stat StatStruct; - -# ifdef __BORLANDC__ -inline int IsATTY(int fd) { return isatty(fd); } -inline int StrCaseCmp(const char* s1, const char* s2) { - return stricmp(s1, s2); -} -inline char* StrDup(const char* src) { return strdup(src); } -# else // !__BORLANDC__ -# if GTEST_OS_WINDOWS_MOBILE -inline int IsATTY(int /* fd */) { return 0; } -# else -inline int IsATTY(int fd) { return _isatty(fd); } -# endif // GTEST_OS_WINDOWS_MOBILE -inline int StrCaseCmp(const char* s1, const char* s2) { - return _stricmp(s1, s2); -} -inline char* StrDup(const char* src) { return _strdup(src); } -# endif // __BORLANDC__ - -# if GTEST_OS_WINDOWS_MOBILE -inline int FileNo(FILE* file) { return reinterpret_cast(_fileno(file)); } -// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this -// time and thus not defined there. -# else -inline int FileNo(FILE* file) { return _fileno(file); } -inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); } -inline int RmDir(const char* dir) { return _rmdir(dir); } -inline bool IsDir(const StatStruct& st) { - return (_S_IFDIR & st.st_mode) != 0; -} -# endif // GTEST_OS_WINDOWS_MOBILE - -#else - -typedef struct stat StatStruct; - -inline int FileNo(FILE* file) { return fileno(file); } -inline int IsATTY(int fd) { return isatty(fd); } -inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); } -inline int StrCaseCmp(const char* s1, const char* s2) { - return strcasecmp(s1, s2); -} -inline char* StrDup(const char* src) { return strdup(src); } -inline int RmDir(const char* dir) { return rmdir(dir); } -inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); } - -#endif // GTEST_OS_WINDOWS - -// Functions deprecated by MSVC 8.0. - -#ifdef _MSC_VER -// Temporarily disable warning 4996 (deprecated function). -# pragma warning(push) -# pragma warning(disable:4996) -#endif - -inline const char* StrNCpy(char* dest, const char* src, size_t n) { - return strncpy(dest, src, n); -} - -// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and -// StrError() aren't needed on Windows CE at this time and thus not -// defined there. - -#if !GTEST_OS_WINDOWS_MOBILE -inline int ChDir(const char* dir) { return chdir(dir); } -#endif -inline FILE* FOpen(const char* path, const char* mode) { - return fopen(path, mode); -} -#if !GTEST_OS_WINDOWS_MOBILE -inline FILE *FReopen(const char* path, const char* mode, FILE* stream) { - return freopen(path, mode, stream); -} -inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); } -#endif -inline int FClose(FILE* fp) { return fclose(fp); } -#if !GTEST_OS_WINDOWS_MOBILE -inline int Read(int fd, void* buf, unsigned int count) { - return static_cast(read(fd, buf, count)); -} -inline int Write(int fd, const void* buf, unsigned int count) { - return static_cast(write(fd, buf, count)); -} -inline int Close(int fd) { return close(fd); } -inline const char* StrError(int errnum) { return strerror(errnum); } -#endif -inline const char* GetEnv(const char* name) { -#if GTEST_OS_WINDOWS_MOBILE - // We are on Windows CE, which has no environment variables. - return NULL; -#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9) - // Environment variables which we programmatically clear will be set to the - // empty string rather than unset (NULL). Handle that case. - const char* const env = getenv(name); - return (env != NULL && env[0] != '\0') ? env : NULL; -#else - return getenv(name); -#endif -} - -#ifdef _MSC_VER -# pragma warning(pop) // Restores the warning state. -#endif - -#if GTEST_OS_WINDOWS_MOBILE -// Windows CE has no C library. The abort() function is used in -// several places in Google Test. This implementation provides a reasonable -// imitation of standard behaviour. -void Abort(); -#else -inline void Abort() { abort(); } -#endif // GTEST_OS_WINDOWS_MOBILE - -} // namespace posix - -// The maximum number a BiggestInt can represent. This definition -// works no matter BiggestInt is represented in one's complement or -// two's complement. -// -// We cannot rely on numeric_limits in STL, as __int64 and long long -// are not part of standard C++ and numeric_limits doesn't need to be -// defined for them. -const BiggestInt kMaxBiggestInt = - ~(static_cast(1) << (8*sizeof(BiggestInt) - 1)); - -// This template class serves as a compile-time function from size to -// type. It maps a size in bytes to a primitive type with that -// size. e.g. -// -// TypeWithSize<4>::UInt -// -// is typedef-ed to be unsigned int (unsigned integer made up of 4 -// bytes). -// -// Such functionality should belong to STL, but I cannot find it -// there. -// -// Google Test uses this class in the implementation of floating-point -// comparison. -// -// For now it only handles UInt (unsigned int) as that's all Google Test -// needs. Other types can be easily added in the future if need -// arises. -template -class TypeWithSize { - public: - // This prevents the user from using TypeWithSize with incorrect - // values of N. - typedef void UInt; -}; - -// The specialization for size 4. -template <> -class TypeWithSize<4> { - public: - // unsigned int has size 4 in both gcc and MSVC. - // - // As base/basictypes.h doesn't compile on Windows, we cannot use - // uint32, uint64, and etc here. - typedef int Int; - typedef unsigned int UInt; -}; - -// The specialization for size 8. -template <> -class TypeWithSize<8> { - public: - -#if GTEST_OS_WINDOWS - typedef __int64 Int; - typedef unsigned __int64 UInt; -#else - typedef long long Int; // NOLINT - typedef unsigned long long UInt; // NOLINT -#endif // GTEST_OS_WINDOWS -}; - -// Integer types of known sizes. -typedef TypeWithSize<4>::Int Int32; -typedef TypeWithSize<4>::UInt UInt32; -typedef TypeWithSize<8>::Int Int64; -typedef TypeWithSize<8>::UInt UInt64; -typedef TypeWithSize<8>::Int TimeInMillis; // Represents time in milliseconds. - -// Utilities for command line flags and environment variables. - -// Macro for referencing flags. -#define GTEST_FLAG(name) FLAGS_gtest_##name - -// Macros for declaring flags. -#define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name) -#define GTEST_DECLARE_int32_(name) \ - GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name) -#define GTEST_DECLARE_string_(name) \ - GTEST_API_ extern ::testing::internal::String GTEST_FLAG(name) - -// Macros for defining flags. -#define GTEST_DEFINE_bool_(name, default_val, doc) \ - GTEST_API_ bool GTEST_FLAG(name) = (default_val) -#define GTEST_DEFINE_int32_(name, default_val, doc) \ - GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val) -#define GTEST_DEFINE_string_(name, default_val, doc) \ - GTEST_API_ ::testing::internal::String GTEST_FLAG(name) = (default_val) - -// Parses 'str' for a 32-bit signed integer. If successful, writes the result -// to *value and returns true; otherwise leaves *value unchanged and returns -// false. -// TODO(chandlerc): Find a better way to refactor flag and environment parsing -// out of both gtest-port.cc and gtest.cc to avoid exporting this utility -// function. -bool ParseInt32(const Message& src_text, const char* str, Int32* value); - -// Parses a bool/Int32/string from the environment variable -// corresponding to the given Google Test flag. -bool BoolFromGTestEnv(const char* flag, bool default_val); -GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val); -const char* StringFromGTestEnv(const char* flag, const char* default_val); - -} // namespace internal -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ - -#if GTEST_OS_LINUX -# include -# include -# include -# include -#endif // GTEST_OS_LINUX - -#include -#include -#include -#include -#include - -// Copyright 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) -// -// The Google C++ Testing Framework (Google Test) -// -// This header file declares the String class and functions used internally by -// Google Test. They are subject to change without notice. They should not used -// by code external to Google Test. -// -// This header file is #included by . -// It should not be #included by other files. - -#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ -#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ - -#ifdef __BORLANDC__ -// string.h is not guaranteed to provide strcpy on C++ Builder. -# include -#endif - -#include - -#include - -namespace testing { -namespace internal { - -// String - a UTF-8 string class. -// -// For historic reasons, we don't use std::string. -// -// TODO(wan@google.com): replace this class with std::string or -// implement it in terms of the latter. -// -// Note that String can represent both NULL and the empty string, -// while std::string cannot represent NULL. -// -// NULL and the empty string are considered different. NULL is less -// than anything (including the empty string) except itself. -// -// This class only provides minimum functionality necessary for -// implementing Google Test. We do not intend to implement a full-fledged -// string class here. -// -// Since the purpose of this class is to provide a substitute for -// std::string on platforms where it cannot be used, we define a copy -// constructor and assignment operators such that we don't need -// conditional compilation in a lot of places. -// -// In order to make the representation efficient, the d'tor of String -// is not virtual. Therefore DO NOT INHERIT FROM String. -class GTEST_API_ String { - public: - // Static utility methods - - // Returns the input enclosed in double quotes if it's not NULL; - // otherwise returns "(null)". For example, "\"Hello\"" is returned - // for input "Hello". - // - // This is useful for printing a C string in the syntax of a literal. - // - // Known issue: escape sequences are not handled yet. - static String ShowCStringQuoted(const char* c_str); - - // Clones a 0-terminated C string, allocating memory using new. The - // caller is responsible for deleting the return value using - // delete[]. Returns the cloned string, or NULL if the input is - // NULL. - // - // This is different from strdup() in string.h, which allocates - // memory using malloc(). - static const char* CloneCString(const char* c_str); - -#if GTEST_OS_WINDOWS_MOBILE - // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be - // able to pass strings to Win32 APIs on CE we need to convert them - // to 'Unicode', UTF-16. - - // Creates a UTF-16 wide string from the given ANSI string, allocating - // memory using new. The caller is responsible for deleting the return - // value using delete[]. Returns the wide string, or NULL if the - // input is NULL. - // - // The wide string is created using the ANSI codepage (CP_ACP) to - // match the behaviour of the ANSI versions of Win32 calls and the - // C runtime. - static LPCWSTR AnsiToUtf16(const char* c_str); - - // Creates an ANSI string from the given wide string, allocating - // memory using new. The caller is responsible for deleting the return - // value using delete[]. Returns the ANSI string, or NULL if the - // input is NULL. - // - // The returned string is created using the ANSI codepage (CP_ACP) to - // match the behaviour of the ANSI versions of Win32 calls and the - // C runtime. - static const char* Utf16ToAnsi(LPCWSTR utf16_str); -#endif - - // Compares two C strings. Returns true iff they have the same content. - // - // Unlike strcmp(), this function can handle NULL argument(s). A - // NULL C string is considered different to any non-NULL C string, - // including the empty string. - static bool CStringEquals(const char* lhs, const char* rhs); - - // Converts a wide C string to a String using the UTF-8 encoding. - // NULL will be converted to "(null)". If an error occurred during - // the conversion, "(failed to convert from wide string)" is - // returned. - static String ShowWideCString(const wchar_t* wide_c_str); - - // Similar to ShowWideCString(), except that this function encloses - // the converted string in double quotes. - static String ShowWideCStringQuoted(const wchar_t* wide_c_str); - - // Compares two wide C strings. Returns true iff they have the same - // content. - // - // Unlike wcscmp(), this function can handle NULL argument(s). A - // NULL C string is considered different to any non-NULL C string, - // including the empty string. - static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs); - - // Compares two C strings, ignoring case. Returns true iff they - // have the same content. - // - // Unlike strcasecmp(), this function can handle NULL argument(s). - // A NULL C string is considered different to any non-NULL C string, - // including the empty string. - static bool CaseInsensitiveCStringEquals(const char* lhs, - const char* rhs); - - // Compares two wide C strings, ignoring case. Returns true iff they - // have the same content. - // - // Unlike wcscasecmp(), this function can handle NULL argument(s). - // A NULL C string is considered different to any non-NULL wide C string, - // including the empty string. - // NB: The implementations on different platforms slightly differ. - // On windows, this method uses _wcsicmp which compares according to LC_CTYPE - // environment variable. On GNU platform this method uses wcscasecmp - // which compares according to LC_CTYPE category of the current locale. - // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the - // current locale. - static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs, - const wchar_t* rhs); - - // Formats a list of arguments to a String, using the same format - // spec string as for printf. - // - // We do not use the StringPrintf class as it is not universally - // available. - // - // The result is limited to 4096 characters (including the tailing - // 0). If 4096 characters are not enough to format the input, - // "" is returned. - static String Format(const char* format, ...); - - // C'tors - - // The default c'tor constructs a NULL string. - String() : c_str_(NULL), length_(0) {} - - // Constructs a String by cloning a 0-terminated C string. - String(const char* a_c_str) { // NOLINT - if (a_c_str == NULL) { - c_str_ = NULL; - length_ = 0; - } else { - ConstructNonNull(a_c_str, strlen(a_c_str)); - } - } - - // Constructs a String by copying a given number of chars from a - // buffer. E.g. String("hello", 3) creates the string "hel", - // String("a\0bcd", 4) creates "a\0bc", String(NULL, 0) creates "", - // and String(NULL, 1) results in access violation. - String(const char* buffer, size_t a_length) { - ConstructNonNull(buffer, a_length); - } - - // The copy c'tor creates a new copy of the string. The two - // String objects do not share content. - String(const String& str) : c_str_(NULL), length_(0) { *this = str; } - - // D'tor. String is intended to be a final class, so the d'tor - // doesn't need to be virtual. - ~String() { delete[] c_str_; } - - // Allows a String to be implicitly converted to an ::std::string or - // ::string, and vice versa. Converting a String containing a NULL - // pointer to ::std::string or ::string is undefined behavior. - // Converting a ::std::string or ::string containing an embedded NUL - // character to a String will result in the prefix up to the first - // NUL character. - String(const ::std::string& str) { - ConstructNonNull(str.c_str(), str.length()); - } - - operator ::std::string() const { return ::std::string(c_str(), length()); } - -#if GTEST_HAS_GLOBAL_STRING - String(const ::string& str) { - ConstructNonNull(str.c_str(), str.length()); - } - - operator ::string() const { return ::string(c_str(), length()); } -#endif // GTEST_HAS_GLOBAL_STRING - - // Returns true iff this is an empty string (i.e. ""). - bool empty() const { return (c_str() != NULL) && (length() == 0); } - - // Compares this with another String. - // Returns < 0 if this is less than rhs, 0 if this is equal to rhs, or > 0 - // if this is greater than rhs. - int Compare(const String& rhs) const; - - // Returns true iff this String equals the given C string. A NULL - // string and a non-NULL string are considered not equal. - bool operator==(const char* a_c_str) const { return Compare(a_c_str) == 0; } - - // Returns true iff this String is less than the given String. A - // NULL string is considered less than "". - bool operator<(const String& rhs) const { return Compare(rhs) < 0; } - - // Returns true iff this String doesn't equal the given C string. A NULL - // string and a non-NULL string are considered not equal. - bool operator!=(const char* a_c_str) const { return !(*this == a_c_str); } - - // Returns true iff this String ends with the given suffix. *Any* - // String is considered to end with a NULL or empty suffix. - bool EndsWith(const char* suffix) const; - - // Returns true iff this String ends with the given suffix, not considering - // case. Any String is considered to end with a NULL or empty suffix. - bool EndsWithCaseInsensitive(const char* suffix) const; - - // Returns the length of the encapsulated string, or 0 if the - // string is NULL. - size_t length() const { return length_; } - - // Gets the 0-terminated C string this String object represents. - // The String object still owns the string. Therefore the caller - // should NOT delete the return value. - const char* c_str() const { return c_str_; } - - // Assigns a C string to this object. Self-assignment works. - const String& operator=(const char* a_c_str) { - return *this = String(a_c_str); - } - - // Assigns a String object to this object. Self-assignment works. - const String& operator=(const String& rhs) { - if (this != &rhs) { - delete[] c_str_; - if (rhs.c_str() == NULL) { - c_str_ = NULL; - length_ = 0; - } else { - ConstructNonNull(rhs.c_str(), rhs.length()); - } - } - - return *this; - } - - private: - // Constructs a non-NULL String from the given content. This - // function can only be called when c_str_ has not been allocated. - // ConstructNonNull(NULL, 0) results in an empty string (""). - // ConstructNonNull(NULL, non_zero) is undefined behavior. - void ConstructNonNull(const char* buffer, size_t a_length) { - char* const str = new char[a_length + 1]; - memcpy(str, buffer, a_length); - str[a_length] = '\0'; - c_str_ = str; - length_ = a_length; - } - - const char* c_str_; - size_t length_; -}; // class String - -// Streams a String to an ostream. Each '\0' character in the String -// is replaced with "\\0". -inline ::std::ostream& operator<<(::std::ostream& os, const String& str) { - if (str.c_str() == NULL) { - os << "(null)"; - } else { - const char* const c_str = str.c_str(); - for (size_t i = 0; i != str.length(); i++) { - if (c_str[i] == '\0') { - os << "\\0"; - } else { - os << c_str[i]; - } - } - } - return os; -} - -// Gets the content of the stringstream's buffer as a String. Each '\0' -// character in the buffer is replaced with "\\0". -GTEST_API_ String StringStreamToString(::std::stringstream* stream); - -// Converts a streamable value to a String. A NULL pointer is -// converted to "(null)". When the input value is a ::string, -// ::std::string, ::wstring, or ::std::wstring object, each NUL -// character in it is replaced with "\\0". - -// Declared here but defined in gtest.h, so that it has access -// to the definition of the Message class, required by the ARM -// compiler. -template -String StreamableToString(const T& streamable); - -} // namespace internal -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: keith.ray@gmail.com (Keith Ray) -// -// Google Test filepath utilities -// -// This header file declares classes and functions used internally by -// Google Test. They are subject to change without notice. -// -// This file is #included in . -// Do not include this header file separately! - -#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ -#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ - - -namespace testing { -namespace internal { - -// FilePath - a class for file and directory pathname manipulation which -// handles platform-specific conventions (like the pathname separator). -// Used for helper functions for naming files in a directory for xml output. -// Except for Set methods, all methods are const or static, which provides an -// "immutable value object" -- useful for peace of mind. -// A FilePath with a value ending in a path separator ("like/this/") represents -// a directory, otherwise it is assumed to represent a file. In either case, -// it may or may not represent an actual file or directory in the file system. -// Names are NOT checked for syntax correctness -- no checking for illegal -// characters, malformed paths, etc. - -class GTEST_API_ FilePath { - public: - FilePath() : pathname_("") { } - FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { } - - explicit FilePath(const char* pathname) : pathname_(pathname) { - Normalize(); - } - - explicit FilePath(const String& pathname) : pathname_(pathname) { - Normalize(); - } - - FilePath& operator=(const FilePath& rhs) { - Set(rhs); - return *this; - } - - void Set(const FilePath& rhs) { - pathname_ = rhs.pathname_; - } - - String ToString() const { return pathname_; } - const char* c_str() const { return pathname_.c_str(); } - - // Returns the current working directory, or "" if unsuccessful. - static FilePath GetCurrentDir(); - - // Given directory = "dir", base_name = "test", number = 0, - // extension = "xml", returns "dir/test.xml". If number is greater - // than zero (e.g., 12), returns "dir/test_12.xml". - // On Windows platform, uses \ as the separator rather than /. - static FilePath MakeFileName(const FilePath& directory, - const FilePath& base_name, - int number, - const char* extension); - - // Given directory = "dir", relative_path = "test.xml", - // returns "dir/test.xml". - // On Windows, uses \ as the separator rather than /. - static FilePath ConcatPaths(const FilePath& directory, - const FilePath& relative_path); - - // Returns a pathname for a file that does not currently exist. The pathname - // will be directory/base_name.extension or - // directory/base_name_.extension if directory/base_name.extension - // already exists. The number will be incremented until a pathname is found - // that does not already exist. - // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. - // There could be a race condition if two or more processes are calling this - // function at the same time -- they could both pick the same filename. - static FilePath GenerateUniqueFileName(const FilePath& directory, - const FilePath& base_name, - const char* extension); - - // Returns true iff the path is NULL or "". - bool IsEmpty() const { return c_str() == NULL || *c_str() == '\0'; } - - // If input name has a trailing separator character, removes it and returns - // the name, otherwise return the name string unmodified. - // On Windows platform, uses \ as the separator, other platforms use /. - FilePath RemoveTrailingPathSeparator() const; - - // Returns a copy of the FilePath with the directory part removed. - // Example: FilePath("path/to/file").RemoveDirectoryName() returns - // FilePath("file"). If there is no directory part ("just_a_file"), it returns - // the FilePath unmodified. If there is no file part ("just_a_dir/") it - // returns an empty FilePath (""). - // On Windows platform, '\' is the path separator, otherwise it is '/'. - FilePath RemoveDirectoryName() const; - - // RemoveFileName returns the directory path with the filename removed. - // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". - // If the FilePath is "a_file" or "/a_file", RemoveFileName returns - // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does - // not have a file, like "just/a/dir/", it returns the FilePath unmodified. - // On Windows platform, '\' is the path separator, otherwise it is '/'. - FilePath RemoveFileName() const; - - // Returns a copy of the FilePath with the case-insensitive extension removed. - // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns - // FilePath("dir/file"). If a case-insensitive extension is not - // found, returns a copy of the original FilePath. - FilePath RemoveExtension(const char* extension) const; - - // Creates directories so that path exists. Returns true if successful or if - // the directories already exist; returns false if unable to create - // directories for any reason. Will also return false if the FilePath does - // not represent a directory (that is, it doesn't end with a path separator). - bool CreateDirectoriesRecursively() const; - - // Create the directory so that path exists. Returns true if successful or - // if the directory already exists; returns false if unable to create the - // directory for any reason, including if the parent directory does not - // exist. Not named "CreateDirectory" because that's a macro on Windows. - bool CreateFolder() const; - - // Returns true if FilePath describes something in the file-system, - // either a file, directory, or whatever, and that something exists. - bool FileOrDirectoryExists() const; - - // Returns true if pathname describes a directory in the file-system - // that exists. - bool DirectoryExists() const; - - // Returns true if FilePath ends with a path separator, which indicates that - // it is intended to represent a directory. Returns false otherwise. - // This does NOT check that a directory (or file) actually exists. - bool IsDirectory() const; - - // Returns true if pathname describes a root directory. (Windows has one - // root directory per disk drive.) - bool IsRootDirectory() const; - - // Returns true if pathname describes an absolute path. - bool IsAbsolutePath() const; - - private: - // Replaces multiple consecutive separators with a single separator. - // For example, "bar///foo" becomes "bar/foo". Does not eliminate other - // redundancies that might be in a pathname involving "." or "..". - // - // A pathname with multiple consecutive separators may occur either through - // user error or as a result of some scripts or APIs that generate a pathname - // with a trailing separator. On other platforms the same API or script - // may NOT generate a pathname with a trailing "/". Then elsewhere that - // pathname may have another "/" and pathname components added to it, - // without checking for the separator already being there. - // The script language and operating system may allow paths like "foo//bar" - // but some of the functions in FilePath will not handle that correctly. In - // particular, RemoveTrailingPathSeparator() only removes one separator, and - // it is called in CreateDirectoriesRecursively() assuming that it will change - // a pathname from directory syntax (trailing separator) to filename syntax. - // - // On Windows this method also replaces the alternate path separator '/' with - // the primary path separator '\\', so that for example "bar\\/\\foo" becomes - // "bar\\foo". - - void Normalize(); - - // Returns a pointer to the last occurence of a valid path separator in - // the FilePath. On Windows, for example, both '/' and '\' are valid path - // separators. Returns NULL if no path separator was found. - const char* FindLastPathSeparator() const; - - String pathname_; -}; // class FilePath - -} // namespace internal -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ -// This file was GENERATED by command: -// pump.py gtest-type-util.h.pump -// DO NOT EDIT BY HAND!!! - -// Copyright 2008 Google Inc. -// All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) - -// Type utilities needed for implementing typed and type-parameterized -// tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND! -// -// Currently we support at most 50 types in a list, and at most 50 -// type-parameterized tests in one type-parameterized test case. -// Please contact googletestframework@googlegroups.com if you need -// more. - -#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ -#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ - - -// #ifdef __GNUC__ is too general here. It is possible to use gcc without using -// libstdc++ (which is where cxxabi.h comes from). -# ifdef __GLIBCXX__ -# include -# elif defined(__HP_aCC) -# include -# endif // __GLIBCXX__ - -namespace testing { -namespace internal { - -// GetTypeName() returns a human-readable name of type T. -// NB: This function is also used in Google Mock, so don't move it inside of -// the typed-test-only section below. -template -String GetTypeName() { -# if GTEST_HAS_RTTI - - const char* const name = typeid(T).name(); -# if defined(__GLIBCXX__) || defined(__HP_aCC) - int status = 0; - // gcc's implementation of typeid(T).name() mangles the type name, - // so we have to demangle it. -# ifdef __GLIBCXX__ - using abi::__cxa_demangle; -# endif // __GLIBCXX__ - char* const readable_name = __cxa_demangle(name, 0, 0, &status); - const String name_str(status == 0 ? readable_name : name); - free(readable_name); - return name_str; -# else - return name; -# endif // __GLIBCXX__ || __HP_aCC - -# else - - return ""; - -# endif // GTEST_HAS_RTTI -} - -#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P - -// AssertyTypeEq::type is defined iff T1 and T2 are the same -// type. This can be used as a compile-time assertion to ensure that -// two types are equal. - -template -struct AssertTypeEq; - -template -struct AssertTypeEq { - typedef bool type; -}; - -// A unique type used as the default value for the arguments of class -// template Types. This allows us to simulate variadic templates -// (e.g. Types, Type, and etc), which C++ doesn't -// support directly. -struct None {}; - -// The following family of struct and struct templates are used to -// represent type lists. In particular, TypesN -// represents a type list with N types (T1, T2, ..., and TN) in it. -// Except for Types0, every struct in the family has two member types: -// Head for the first type in the list, and Tail for the rest of the -// list. - -// The empty type list. -struct Types0 {}; - -// Type lists of length 1, 2, 3, and so on. - -template -struct Types1 { - typedef T1 Head; - typedef Types0 Tail; -}; -template -struct Types2 { - typedef T1 Head; - typedef Types1 Tail; -}; - -template -struct Types3 { - typedef T1 Head; - typedef Types2 Tail; -}; - -template -struct Types4 { - typedef T1 Head; - typedef Types3 Tail; -}; - -template -struct Types5 { - typedef T1 Head; - typedef Types4 Tail; -}; - -template -struct Types6 { - typedef T1 Head; - typedef Types5 Tail; -}; - -template -struct Types7 { - typedef T1 Head; - typedef Types6 Tail; -}; - -template -struct Types8 { - typedef T1 Head; - typedef Types7 Tail; -}; - -template -struct Types9 { - typedef T1 Head; - typedef Types8 Tail; -}; - -template -struct Types10 { - typedef T1 Head; - typedef Types9 Tail; -}; - -template -struct Types11 { - typedef T1 Head; - typedef Types10 Tail; -}; - -template -struct Types12 { - typedef T1 Head; - typedef Types11 Tail; -}; - -template -struct Types13 { - typedef T1 Head; - typedef Types12 Tail; -}; - -template -struct Types14 { - typedef T1 Head; - typedef Types13 Tail; -}; - -template -struct Types15 { - typedef T1 Head; - typedef Types14 Tail; -}; - -template -struct Types16 { - typedef T1 Head; - typedef Types15 Tail; -}; - -template -struct Types17 { - typedef T1 Head; - typedef Types16 Tail; -}; - -template -struct Types18 { - typedef T1 Head; - typedef Types17 Tail; -}; - -template -struct Types19 { - typedef T1 Head; - typedef Types18 Tail; -}; - -template -struct Types20 { - typedef T1 Head; - typedef Types19 Tail; -}; - -template -struct Types21 { - typedef T1 Head; - typedef Types20 Tail; -}; - -template -struct Types22 { - typedef T1 Head; - typedef Types21 Tail; -}; - -template -struct Types23 { - typedef T1 Head; - typedef Types22 Tail; -}; - -template -struct Types24 { - typedef T1 Head; - typedef Types23 Tail; -}; - -template -struct Types25 { - typedef T1 Head; - typedef Types24 Tail; -}; - -template -struct Types26 { - typedef T1 Head; - typedef Types25 Tail; -}; - -template -struct Types27 { - typedef T1 Head; - typedef Types26 Tail; -}; - -template -struct Types28 { - typedef T1 Head; - typedef Types27 Tail; -}; - -template -struct Types29 { - typedef T1 Head; - typedef Types28 Tail; -}; - -template -struct Types30 { - typedef T1 Head; - typedef Types29 Tail; -}; - -template -struct Types31 { - typedef T1 Head; - typedef Types30 Tail; -}; - -template -struct Types32 { - typedef T1 Head; - typedef Types31 Tail; -}; - -template -struct Types33 { - typedef T1 Head; - typedef Types32 Tail; -}; - -template -struct Types34 { - typedef T1 Head; - typedef Types33 Tail; -}; - -template -struct Types35 { - typedef T1 Head; - typedef Types34 Tail; -}; - -template -struct Types36 { - typedef T1 Head; - typedef Types35 Tail; -}; - -template -struct Types37 { - typedef T1 Head; - typedef Types36 Tail; -}; - -template -struct Types38 { - typedef T1 Head; - typedef Types37 Tail; -}; - -template -struct Types39 { - typedef T1 Head; - typedef Types38 Tail; -}; - -template -struct Types40 { - typedef T1 Head; - typedef Types39 Tail; -}; - -template -struct Types41 { - typedef T1 Head; - typedef Types40 Tail; -}; - -template -struct Types42 { - typedef T1 Head; - typedef Types41 Tail; -}; - -template -struct Types43 { - typedef T1 Head; - typedef Types42 Tail; -}; - -template -struct Types44 { - typedef T1 Head; - typedef Types43 Tail; -}; - -template -struct Types45 { - typedef T1 Head; - typedef Types44 Tail; -}; - -template -struct Types46 { - typedef T1 Head; - typedef Types45 Tail; -}; - -template -struct Types47 { - typedef T1 Head; - typedef Types46 Tail; -}; - -template -struct Types48 { - typedef T1 Head; - typedef Types47 Tail; -}; - -template -struct Types49 { - typedef T1 Head; - typedef Types48 Tail; -}; - -template -struct Types50 { - typedef T1 Head; - typedef Types49 Tail; -}; - - -} // namespace internal - -// We don't want to require the users to write TypesN<...> directly, -// as that would require them to count the length. Types<...> is much -// easier to write, but generates horrible messages when there is a -// compiler error, as gcc insists on printing out each template -// argument, even if it has the default value (this means Types -// will appear as Types in the compiler -// errors). -// -// Our solution is to combine the best part of the two approaches: a -// user would write Types, and Google Test will translate -// that to TypesN internally to make error messages -// readable. The translation is done by the 'type' member of the -// Types template. -template -struct Types { - typedef internal::Types50 type; -}; - -template <> -struct Types { - typedef internal::Types0 type; -}; -template -struct Types { - typedef internal::Types1 type; -}; -template -struct Types { - typedef internal::Types2 type; -}; -template -struct Types { - typedef internal::Types3 type; -}; -template -struct Types { - typedef internal::Types4 type; -}; -template -struct Types { - typedef internal::Types5 type; -}; -template -struct Types { - typedef internal::Types6 type; -}; -template -struct Types { - typedef internal::Types7 type; -}; -template -struct Types { - typedef internal::Types8 type; -}; -template -struct Types { - typedef internal::Types9 type; -}; -template -struct Types { - typedef internal::Types10 type; -}; -template -struct Types { - typedef internal::Types11 type; -}; -template -struct Types { - typedef internal::Types12 type; -}; -template -struct Types { - typedef internal::Types13 type; -}; -template -struct Types { - typedef internal::Types14 type; -}; -template -struct Types { - typedef internal::Types15 type; -}; -template -struct Types { - typedef internal::Types16 type; -}; -template -struct Types { - typedef internal::Types17 type; -}; -template -struct Types { - typedef internal::Types18 type; -}; -template -struct Types { - typedef internal::Types19 type; -}; -template -struct Types { - typedef internal::Types20 type; -}; -template -struct Types { - typedef internal::Types21 type; -}; -template -struct Types { - typedef internal::Types22 type; -}; -template -struct Types { - typedef internal::Types23 type; -}; -template -struct Types { - typedef internal::Types24 type; -}; -template -struct Types { - typedef internal::Types25 type; -}; -template -struct Types { - typedef internal::Types26 type; -}; -template -struct Types { - typedef internal::Types27 type; -}; -template -struct Types { - typedef internal::Types28 type; -}; -template -struct Types { - typedef internal::Types29 type; -}; -template -struct Types { - typedef internal::Types30 type; -}; -template -struct Types { - typedef internal::Types31 type; -}; -template -struct Types { - typedef internal::Types32 type; -}; -template -struct Types { - typedef internal::Types33 type; -}; -template -struct Types { - typedef internal::Types34 type; -}; -template -struct Types { - typedef internal::Types35 type; -}; -template -struct Types { - typedef internal::Types36 type; -}; -template -struct Types { - typedef internal::Types37 type; -}; -template -struct Types { - typedef internal::Types38 type; -}; -template -struct Types { - typedef internal::Types39 type; -}; -template -struct Types { - typedef internal::Types40 type; -}; -template -struct Types { - typedef internal::Types41 type; -}; -template -struct Types { - typedef internal::Types42 type; -}; -template -struct Types { - typedef internal::Types43 type; -}; -template -struct Types { - typedef internal::Types44 type; -}; -template -struct Types { - typedef internal::Types45 type; -}; -template -struct Types { - typedef internal::Types46 type; -}; -template -struct Types { - typedef internal::Types47 type; -}; -template -struct Types { - typedef internal::Types48 type; -}; -template -struct Types { - typedef internal::Types49 type; -}; - -namespace internal { - -# define GTEST_TEMPLATE_ template class - -// The template "selector" struct TemplateSel is used to -// represent Tmpl, which must be a class template with one type -// parameter, as a type. TemplateSel::Bind::type is defined -// as the type Tmpl. This allows us to actually instantiate the -// template "selected" by TemplateSel. -// -// This trick is necessary for simulating typedef for class templates, -// which C++ doesn't support directly. -template -struct TemplateSel { - template - struct Bind { - typedef Tmpl type; - }; -}; - -# define GTEST_BIND_(TmplSel, T) \ - TmplSel::template Bind::type - -// A unique struct template used as the default value for the -// arguments of class template Templates. This allows us to simulate -// variadic templates (e.g. Templates, Templates, -// and etc), which C++ doesn't support directly. -template -struct NoneT {}; - -// The following family of struct and struct templates are used to -// represent template lists. In particular, TemplatesN represents a list of N templates (T1, T2, ..., and TN). Except -// for Templates0, every struct in the family has two member types: -// Head for the selector of the first template in the list, and Tail -// for the rest of the list. - -// The empty template list. -struct Templates0 {}; - -// Template lists of length 1, 2, 3, and so on. - -template -struct Templates1 { - typedef TemplateSel Head; - typedef Templates0 Tail; -}; -template -struct Templates2 { - typedef TemplateSel Head; - typedef Templates1 Tail; -}; - -template -struct Templates3 { - typedef TemplateSel Head; - typedef Templates2 Tail; -}; - -template -struct Templates4 { - typedef TemplateSel Head; - typedef Templates3 Tail; -}; - -template -struct Templates5 { - typedef TemplateSel Head; - typedef Templates4 Tail; -}; - -template -struct Templates6 { - typedef TemplateSel Head; - typedef Templates5 Tail; -}; - -template -struct Templates7 { - typedef TemplateSel Head; - typedef Templates6 Tail; -}; - -template -struct Templates8 { - typedef TemplateSel Head; - typedef Templates7 Tail; -}; - -template -struct Templates9 { - typedef TemplateSel Head; - typedef Templates8 Tail; -}; - -template -struct Templates10 { - typedef TemplateSel Head; - typedef Templates9 Tail; -}; - -template -struct Templates11 { - typedef TemplateSel Head; - typedef Templates10 Tail; -}; - -template -struct Templates12 { - typedef TemplateSel Head; - typedef Templates11 Tail; -}; - -template -struct Templates13 { - typedef TemplateSel Head; - typedef Templates12 Tail; -}; - -template -struct Templates14 { - typedef TemplateSel Head; - typedef Templates13 Tail; -}; - -template -struct Templates15 { - typedef TemplateSel Head; - typedef Templates14 Tail; -}; - -template -struct Templates16 { - typedef TemplateSel Head; - typedef Templates15 Tail; -}; - -template -struct Templates17 { - typedef TemplateSel Head; - typedef Templates16 Tail; -}; - -template -struct Templates18 { - typedef TemplateSel Head; - typedef Templates17 Tail; -}; - -template -struct Templates19 { - typedef TemplateSel Head; - typedef Templates18 Tail; -}; - -template -struct Templates20 { - typedef TemplateSel Head; - typedef Templates19 Tail; -}; - -template -struct Templates21 { - typedef TemplateSel Head; - typedef Templates20 Tail; -}; - -template -struct Templates22 { - typedef TemplateSel Head; - typedef Templates21 Tail; -}; - -template -struct Templates23 { - typedef TemplateSel Head; - typedef Templates22 Tail; -}; - -template -struct Templates24 { - typedef TemplateSel Head; - typedef Templates23 Tail; -}; - -template -struct Templates25 { - typedef TemplateSel Head; - typedef Templates24 Tail; -}; - -template -struct Templates26 { - typedef TemplateSel Head; - typedef Templates25 Tail; -}; - -template -struct Templates27 { - typedef TemplateSel Head; - typedef Templates26 Tail; -}; - -template -struct Templates28 { - typedef TemplateSel Head; - typedef Templates27 Tail; -}; - -template -struct Templates29 { - typedef TemplateSel Head; - typedef Templates28 Tail; -}; - -template -struct Templates30 { - typedef TemplateSel Head; - typedef Templates29 Tail; -}; - -template -struct Templates31 { - typedef TemplateSel Head; - typedef Templates30 Tail; -}; - -template -struct Templates32 { - typedef TemplateSel Head; - typedef Templates31 Tail; -}; - -template -struct Templates33 { - typedef TemplateSel Head; - typedef Templates32 Tail; -}; - -template -struct Templates34 { - typedef TemplateSel Head; - typedef Templates33 Tail; -}; - -template -struct Templates35 { - typedef TemplateSel Head; - typedef Templates34 Tail; -}; - -template -struct Templates36 { - typedef TemplateSel Head; - typedef Templates35 Tail; -}; - -template -struct Templates37 { - typedef TemplateSel Head; - typedef Templates36 Tail; -}; - -template -struct Templates38 { - typedef TemplateSel Head; - typedef Templates37 Tail; -}; - -template -struct Templates39 { - typedef TemplateSel Head; - typedef Templates38 Tail; -}; - -template -struct Templates40 { - typedef TemplateSel Head; - typedef Templates39 Tail; -}; - -template -struct Templates41 { - typedef TemplateSel Head; - typedef Templates40 Tail; -}; - -template -struct Templates42 { - typedef TemplateSel Head; - typedef Templates41 Tail; -}; - -template -struct Templates43 { - typedef TemplateSel Head; - typedef Templates42 Tail; -}; - -template -struct Templates44 { - typedef TemplateSel Head; - typedef Templates43 Tail; -}; - -template -struct Templates45 { - typedef TemplateSel Head; - typedef Templates44 Tail; -}; - -template -struct Templates46 { - typedef TemplateSel Head; - typedef Templates45 Tail; -}; - -template -struct Templates47 { - typedef TemplateSel Head; - typedef Templates46 Tail; -}; - -template -struct Templates48 { - typedef TemplateSel Head; - typedef Templates47 Tail; -}; - -template -struct Templates49 { - typedef TemplateSel Head; - typedef Templates48 Tail; -}; - -template -struct Templates50 { - typedef TemplateSel Head; - typedef Templates49 Tail; -}; - - -// We don't want to require the users to write TemplatesN<...> directly, -// as that would require them to count the length. Templates<...> is much -// easier to write, but generates horrible messages when there is a -// compiler error, as gcc insists on printing out each template -// argument, even if it has the default value (this means Templates -// will appear as Templates in the compiler -// errors). -// -// Our solution is to combine the best part of the two approaches: a -// user would write Templates, and Google Test will translate -// that to TemplatesN internally to make error messages -// readable. The translation is done by the 'type' member of the -// Templates template. -template -struct Templates { - typedef Templates50 type; -}; - -template <> -struct Templates { - typedef Templates0 type; -}; -template -struct Templates { - typedef Templates1 type; -}; -template -struct Templates { - typedef Templates2 type; -}; -template -struct Templates { - typedef Templates3 type; -}; -template -struct Templates { - typedef Templates4 type; -}; -template -struct Templates { - typedef Templates5 type; -}; -template -struct Templates { - typedef Templates6 type; -}; -template -struct Templates { - typedef Templates7 type; -}; -template -struct Templates { - typedef Templates8 type; -}; -template -struct Templates { - typedef Templates9 type; -}; -template -struct Templates { - typedef Templates10 type; -}; -template -struct Templates { - typedef Templates11 type; -}; -template -struct Templates { - typedef Templates12 type; -}; -template -struct Templates { - typedef Templates13 type; -}; -template -struct Templates { - typedef Templates14 type; -}; -template -struct Templates { - typedef Templates15 type; -}; -template -struct Templates { - typedef Templates16 type; -}; -template -struct Templates { - typedef Templates17 type; -}; -template -struct Templates { - typedef Templates18 type; -}; -template -struct Templates { - typedef Templates19 type; -}; -template -struct Templates { - typedef Templates20 type; -}; -template -struct Templates { - typedef Templates21 type; -}; -template -struct Templates { - typedef Templates22 type; -}; -template -struct Templates { - typedef Templates23 type; -}; -template -struct Templates { - typedef Templates24 type; -}; -template -struct Templates { - typedef Templates25 type; -}; -template -struct Templates { - typedef Templates26 type; -}; -template -struct Templates { - typedef Templates27 type; -}; -template -struct Templates { - typedef Templates28 type; -}; -template -struct Templates { - typedef Templates29 type; -}; -template -struct Templates { - typedef Templates30 type; -}; -template -struct Templates { - typedef Templates31 type; -}; -template -struct Templates { - typedef Templates32 type; -}; -template -struct Templates { - typedef Templates33 type; -}; -template -struct Templates { - typedef Templates34 type; -}; -template -struct Templates { - typedef Templates35 type; -}; -template -struct Templates { - typedef Templates36 type; -}; -template -struct Templates { - typedef Templates37 type; -}; -template -struct Templates { - typedef Templates38 type; -}; -template -struct Templates { - typedef Templates39 type; -}; -template -struct Templates { - typedef Templates40 type; -}; -template -struct Templates { - typedef Templates41 type; -}; -template -struct Templates { - typedef Templates42 type; -}; -template -struct Templates { - typedef Templates43 type; -}; -template -struct Templates { - typedef Templates44 type; -}; -template -struct Templates { - typedef Templates45 type; -}; -template -struct Templates { - typedef Templates46 type; -}; -template -struct Templates { - typedef Templates47 type; -}; -template -struct Templates { - typedef Templates48 type; -}; -template -struct Templates { - typedef Templates49 type; -}; - -// The TypeList template makes it possible to use either a single type -// or a Types<...> list in TYPED_TEST_CASE() and -// INSTANTIATE_TYPED_TEST_CASE_P(). - -template -struct TypeList { typedef Types1 type; }; - -template -struct TypeList > { - typedef typename Types::type type; -}; - -#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P - -} // namespace internal -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ - -// Due to C++ preprocessor weirdness, we need double indirection to -// concatenate two tokens when one of them is __LINE__. Writing -// -// foo ## __LINE__ -// -// will result in the token foo__LINE__, instead of foo followed by -// the current line number. For more details, see -// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6 -#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar) -#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar - -// Google Test defines the testing::Message class to allow construction of -// test messages via the << operator. The idea is that anything -// streamable to std::ostream can be streamed to a testing::Message. -// This allows a user to use his own types in Google Test assertions by -// overloading the << operator. -// -// util/gtl/stl_logging-inl.h overloads << for STL containers. These -// overloads cannot be defined in the std namespace, as that will be -// undefined behavior. Therefore, they are defined in the global -// namespace instead. -// -// C++'s symbol lookup rule (i.e. Koenig lookup) says that these -// overloads are visible in either the std namespace or the global -// namespace, but not other namespaces, including the testing -// namespace which Google Test's Message class is in. -// -// To allow STL containers (and other types that has a << operator -// defined in the global namespace) to be used in Google Test assertions, -// testing::Message must access the custom << operator from the global -// namespace. Hence this helper function. -// -// Note: Jeffrey Yasskin suggested an alternative fix by "using -// ::operator<<;" in the definition of Message's operator<<. That fix -// doesn't require a helper function, but unfortunately doesn't -// compile with MSVC. -template -inline void GTestStreamToHelper(std::ostream* os, const T& val) { - *os << val; -} - -class ProtocolMessage; -namespace proto2 { class Message; } - -namespace testing { - -// Forward declarations. - -class AssertionResult; // Result of an assertion. -class Message; // Represents a failure message. -class Test; // Represents a test. -class TestInfo; // Information about a test. -class TestPartResult; // Result of a test part. -class UnitTest; // A collection of test cases. - -template -::std::string PrintToString(const T& value); - -namespace internal { - -struct TraceInfo; // Information about a trace point. -class ScopedTrace; // Implements scoped trace. -class TestInfoImpl; // Opaque implementation of TestInfo -class UnitTestImpl; // Opaque implementation of UnitTest - -// How many times InitGoogleTest() has been called. -extern int g_init_gtest_count; - -// The text used in failure messages to indicate the start of the -// stack trace. -GTEST_API_ extern const char kStackTraceMarker[]; - -// A secret type that Google Test users don't know about. It has no -// definition on purpose. Therefore it's impossible to create a -// Secret object, which is what we want. -class Secret; - -// Two overloaded helpers for checking at compile time whether an -// expression is a null pointer literal (i.e. NULL or any 0-valued -// compile-time integral constant). Their return values have -// different sizes, so we can use sizeof() to test which version is -// picked by the compiler. These helpers have no implementations, as -// we only need their signatures. -// -// Given IsNullLiteralHelper(x), the compiler will pick the first -// version if x can be implicitly converted to Secret*, and pick the -// second version otherwise. Since Secret is a secret and incomplete -// type, the only expression a user can write that has type Secret* is -// a null pointer literal. Therefore, we know that x is a null -// pointer literal if and only if the first version is picked by the -// compiler. -char IsNullLiteralHelper(Secret* p); -char (&IsNullLiteralHelper(...))[2]; // NOLINT - -// A compile-time bool constant that is true if and only if x is a -// null pointer literal (i.e. NULL or any 0-valued compile-time -// integral constant). -#ifdef GTEST_ELLIPSIS_NEEDS_POD_ -// We lose support for NULL detection where the compiler doesn't like -// passing non-POD classes through ellipsis (...). -# define GTEST_IS_NULL_LITERAL_(x) false -#else -# define GTEST_IS_NULL_LITERAL_(x) \ - (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1) -#endif // GTEST_ELLIPSIS_NEEDS_POD_ - -// Appends the user-supplied message to the Google-Test-generated message. -GTEST_API_ String AppendUserMessage(const String& gtest_msg, - const Message& user_msg); - -// A helper class for creating scoped traces in user programs. -class GTEST_API_ ScopedTrace { - public: - // The c'tor pushes the given source file location and message onto - // a trace stack maintained by Google Test. - ScopedTrace(const char* file, int line, const Message& message); - - // The d'tor pops the info pushed by the c'tor. - // - // Note that the d'tor is not virtual in order to be efficient. - // Don't inherit from ScopedTrace! - ~ScopedTrace(); - - private: - GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace); -} GTEST_ATTRIBUTE_UNUSED_; // A ScopedTrace object does its job in its - // c'tor and d'tor. Therefore it doesn't - // need to be used otherwise. - -// Converts a streamable value to a String. A NULL pointer is -// converted to "(null)". When the input value is a ::string, -// ::std::string, ::wstring, or ::std::wstring object, each NUL -// character in it is replaced with "\\0". -// Declared here but defined in gtest.h, so that it has access -// to the definition of the Message class, required by the ARM -// compiler. -template -String StreamableToString(const T& streamable); - -// The Symbian compiler has a bug that prevents it from selecting the -// correct overload of FormatForComparisonFailureMessage (see below) -// unless we pass the first argument by reference. If we do that, -// however, Visual Age C++ 10.1 generates a compiler error. Therefore -// we only apply the work-around for Symbian. -#if defined(__SYMBIAN32__) -# define GTEST_CREF_WORKAROUND_ const& -#else -# define GTEST_CREF_WORKAROUND_ -#endif - -// When this operand is a const char* or char*, if the other operand -// is a ::std::string or ::string, we print this operand as a C string -// rather than a pointer (we do the same for wide strings); otherwise -// we print it as a pointer to be safe. - -// This internal macro is used to avoid duplicated code. -#define GTEST_FORMAT_IMPL_(operand2_type, operand1_printer)\ -inline String FormatForComparisonFailureMessage(\ - operand2_type::value_type* GTEST_CREF_WORKAROUND_ str, \ - const operand2_type& /*operand2*/) {\ - return operand1_printer(str);\ -}\ -inline String FormatForComparisonFailureMessage(\ - const operand2_type::value_type* GTEST_CREF_WORKAROUND_ str, \ - const operand2_type& /*operand2*/) {\ - return operand1_printer(str);\ -} - -GTEST_FORMAT_IMPL_(::std::string, String::ShowCStringQuoted) -#if GTEST_HAS_STD_WSTRING -GTEST_FORMAT_IMPL_(::std::wstring, String::ShowWideCStringQuoted) -#endif // GTEST_HAS_STD_WSTRING - -#if GTEST_HAS_GLOBAL_STRING -GTEST_FORMAT_IMPL_(::string, String::ShowCStringQuoted) -#endif // GTEST_HAS_GLOBAL_STRING -#if GTEST_HAS_GLOBAL_WSTRING -GTEST_FORMAT_IMPL_(::wstring, String::ShowWideCStringQuoted) -#endif // GTEST_HAS_GLOBAL_WSTRING - -#undef GTEST_FORMAT_IMPL_ - -// The next four overloads handle the case where the operand being -// printed is a char/wchar_t pointer and the other operand is not a -// string/wstring object. In such cases, we just print the operand as -// a pointer to be safe. -#define GTEST_FORMAT_CHAR_PTR_IMPL_(CharType) \ - template \ - String FormatForComparisonFailureMessage(CharType* GTEST_CREF_WORKAROUND_ p, \ - const T&) { \ - return PrintToString(static_cast(p)); \ - } - -GTEST_FORMAT_CHAR_PTR_IMPL_(char) -GTEST_FORMAT_CHAR_PTR_IMPL_(const char) -GTEST_FORMAT_CHAR_PTR_IMPL_(wchar_t) -GTEST_FORMAT_CHAR_PTR_IMPL_(const wchar_t) - -#undef GTEST_FORMAT_CHAR_PTR_IMPL_ - -// Constructs and returns the message for an equality assertion -// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. -// -// The first four parameters are the expressions used in the assertion -// and their values, as strings. For example, for ASSERT_EQ(foo, bar) -// where foo is 5 and bar is 6, we have: -// -// expected_expression: "foo" -// actual_expression: "bar" -// expected_value: "5" -// actual_value: "6" -// -// The ignoring_case parameter is true iff the assertion is a -// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will -// be inserted into the message. -GTEST_API_ AssertionResult EqFailure(const char* expected_expression, - const char* actual_expression, - const String& expected_value, - const String& actual_value, - bool ignoring_case); - -// Constructs a failure message for Boolean assertions such as EXPECT_TRUE. -GTEST_API_ String GetBoolAssertionFailureMessage( - const AssertionResult& assertion_result, - const char* expression_text, - const char* actual_predicate_value, - const char* expected_predicate_value); - -// This template class represents an IEEE floating-point number -// (either single-precision or double-precision, depending on the -// template parameters). -// -// The purpose of this class is to do more sophisticated number -// comparison. (Due to round-off error, etc, it's very unlikely that -// two floating-points will be equal exactly. Hence a naive -// comparison by the == operation often doesn't work.) -// -// Format of IEEE floating-point: -// -// The most-significant bit being the leftmost, an IEEE -// floating-point looks like -// -// sign_bit exponent_bits fraction_bits -// -// Here, sign_bit is a single bit that designates the sign of the -// number. -// -// For float, there are 8 exponent bits and 23 fraction bits. -// -// For double, there are 11 exponent bits and 52 fraction bits. -// -// More details can be found at -// http://en.wikipedia.org/wiki/IEEE_floating-point_standard. -// -// Template parameter: -// -// RawType: the raw floating-point type (either float or double) -template -class FloatingPoint { - public: - // Defines the unsigned integer type that has the same size as the - // floating point number. - typedef typename TypeWithSize::UInt Bits; - - // Constants. - - // # of bits in a number. - static const size_t kBitCount = 8*sizeof(RawType); - - // # of fraction bits in a number. - static const size_t kFractionBitCount = - std::numeric_limits::digits - 1; - - // # of exponent bits in a number. - static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount; - - // The mask for the sign bit. - static const Bits kSignBitMask = static_cast(1) << (kBitCount - 1); - - // The mask for the fraction bits. - static const Bits kFractionBitMask = - ~static_cast(0) >> (kExponentBitCount + 1); - - // The mask for the exponent bits. - static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask); - - // How many ULP's (Units in the Last Place) we want to tolerate when - // comparing two numbers. The larger the value, the more error we - // allow. A 0 value means that two numbers must be exactly the same - // to be considered equal. - // - // The maximum error of a single floating-point operation is 0.5 - // units in the last place. On Intel CPU's, all floating-point - // calculations are done with 80-bit precision, while double has 64 - // bits. Therefore, 4 should be enough for ordinary use. - // - // See the following article for more details on ULP: - // http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm. - static const size_t kMaxUlps = 4; - - // Constructs a FloatingPoint from a raw floating-point number. - // - // On an Intel CPU, passing a non-normalized NAN (Not a Number) - // around may change its bits, although the new value is guaranteed - // to be also a NAN. Therefore, don't expect this constructor to - // preserve the bits in x when x is a NAN. - explicit FloatingPoint(const RawType& x) { u_.value_ = x; } - - // Static methods - - // Reinterprets a bit pattern as a floating-point number. - // - // This function is needed to test the AlmostEquals() method. - static RawType ReinterpretBits(const Bits bits) { - FloatingPoint fp(0); - fp.u_.bits_ = bits; - return fp.u_.value_; - } - - // Returns the floating-point number that represent positive infinity. - static RawType Infinity() { - return ReinterpretBits(kExponentBitMask); - } - - // Non-static methods - - // Returns the bits that represents this number. - const Bits &bits() const { return u_.bits_; } - - // Returns the exponent bits of this number. - Bits exponent_bits() const { return kExponentBitMask & u_.bits_; } - - // Returns the fraction bits of this number. - Bits fraction_bits() const { return kFractionBitMask & u_.bits_; } - - // Returns the sign bit of this number. - Bits sign_bit() const { return kSignBitMask & u_.bits_; } - - // Returns true iff this is NAN (not a number). - bool is_nan() const { - // It's a NAN if the exponent bits are all ones and the fraction - // bits are not entirely zeros. - return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0); - } - - // Returns true iff this number is at most kMaxUlps ULP's away from - // rhs. In particular, this function: - // - // - returns false if either number is (or both are) NAN. - // - treats really large numbers as almost equal to infinity. - // - thinks +0.0 and -0.0 are 0 DLP's apart. - bool AlmostEquals(const FloatingPoint& rhs) const { - // The IEEE standard says that any comparison operation involving - // a NAN must return false. - if (is_nan() || rhs.is_nan()) return false; - - return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_) - <= kMaxUlps; - } - - private: - // The data type used to store the actual floating-point number. - union FloatingPointUnion { - RawType value_; // The raw floating-point number. - Bits bits_; // The bits that represent the number. - }; - - // Converts an integer from the sign-and-magnitude representation to - // the biased representation. More precisely, let N be 2 to the - // power of (kBitCount - 1), an integer x is represented by the - // unsigned number x + N. - // - // For instance, - // - // -N + 1 (the most negative number representable using - // sign-and-magnitude) is represented by 1; - // 0 is represented by N; and - // N - 1 (the biggest number representable using - // sign-and-magnitude) is represented by 2N - 1. - // - // Read http://en.wikipedia.org/wiki/Signed_number_representations - // for more details on signed number representations. - static Bits SignAndMagnitudeToBiased(const Bits &sam) { - if (kSignBitMask & sam) { - // sam represents a negative number. - return ~sam + 1; - } else { - // sam represents a positive number. - return kSignBitMask | sam; - } - } - - // Given two numbers in the sign-and-magnitude representation, - // returns the distance between them as an unsigned number. - static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1, - const Bits &sam2) { - const Bits biased1 = SignAndMagnitudeToBiased(sam1); - const Bits biased2 = SignAndMagnitudeToBiased(sam2); - return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1); - } - - FloatingPointUnion u_; -}; - -// Typedefs the instances of the FloatingPoint template class that we -// care to use. -typedef FloatingPoint Float; -typedef FloatingPoint Double; - -// In order to catch the mistake of putting tests that use different -// test fixture classes in the same test case, we need to assign -// unique IDs to fixture classes and compare them. The TypeId type is -// used to hold such IDs. The user should treat TypeId as an opaque -// type: the only operation allowed on TypeId values is to compare -// them for equality using the == operator. -typedef const void* TypeId; - -template -class TypeIdHelper { - public: - // dummy_ must not have a const type. Otherwise an overly eager - // compiler (e.g. MSVC 7.1 & 8.0) may try to merge - // TypeIdHelper::dummy_ for different Ts as an "optimization". - static bool dummy_; -}; - -template -bool TypeIdHelper::dummy_ = false; - -// GetTypeId() returns the ID of type T. Different values will be -// returned for different types. Calling the function twice with the -// same type argument is guaranteed to return the same ID. -template -TypeId GetTypeId() { - // The compiler is required to allocate a different - // TypeIdHelper::dummy_ variable for each T used to instantiate - // the template. Therefore, the address of dummy_ is guaranteed to - // be unique. - return &(TypeIdHelper::dummy_); -} - -// Returns the type ID of ::testing::Test. Always call this instead -// of GetTypeId< ::testing::Test>() to get the type ID of -// ::testing::Test, as the latter may give the wrong result due to a -// suspected linker bug when compiling Google Test as a Mac OS X -// framework. -GTEST_API_ TypeId GetTestTypeId(); - -// Defines the abstract factory interface that creates instances -// of a Test object. -class TestFactoryBase { - public: - virtual ~TestFactoryBase() {} - - // Creates a test instance to run. The instance is both created and destroyed - // within TestInfoImpl::Run() - virtual Test* CreateTest() = 0; - - protected: - TestFactoryBase() {} - - private: - GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase); -}; - -// This class provides implementation of TeastFactoryBase interface. -// It is used in TEST and TEST_F macros. -template -class TestFactoryImpl : public TestFactoryBase { - public: - virtual Test* CreateTest() { return new TestClass; } -}; - -#if GTEST_OS_WINDOWS - -// Predicate-formatters for implementing the HRESULT checking macros -// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED} -// We pass a long instead of HRESULT to avoid causing an -// include dependency for the HRESULT type. -GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr, - long hr); // NOLINT -GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr, - long hr); // NOLINT - -#endif // GTEST_OS_WINDOWS - -// Types of SetUpTestCase() and TearDownTestCase() functions. -typedef void (*SetUpTestCaseFunc)(); -typedef void (*TearDownTestCaseFunc)(); - -// Creates a new TestInfo object and registers it with Google Test; -// returns the created object. -// -// Arguments: -// -// test_case_name: name of the test case -// name: name of the test -// type_param the name of the test's type parameter, or NULL if -// this is not a typed or a type-parameterized test. -// value_param text representation of the test's value parameter, -// or NULL if this is not a type-parameterized test. -// fixture_class_id: ID of the test fixture class -// set_up_tc: pointer to the function that sets up the test case -// tear_down_tc: pointer to the function that tears down the test case -// factory: pointer to the factory that creates a test object. -// The newly created TestInfo instance will assume -// ownership of the factory object. -GTEST_API_ TestInfo* MakeAndRegisterTestInfo( - const char* test_case_name, const char* name, - const char* type_param, - const char* value_param, - TypeId fixture_class_id, - SetUpTestCaseFunc set_up_tc, - TearDownTestCaseFunc tear_down_tc, - TestFactoryBase* factory); - -// If *pstr starts with the given prefix, modifies *pstr to be right -// past the prefix and returns true; otherwise leaves *pstr unchanged -// and returns false. None of pstr, *pstr, and prefix can be NULL. -GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr); - -#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P - -// State of the definition of a type-parameterized test case. -class GTEST_API_ TypedTestCasePState { - public: - TypedTestCasePState() : registered_(false) {} - - // Adds the given test name to defined_test_names_ and return true - // if the test case hasn't been registered; otherwise aborts the - // program. - bool AddTestName(const char* file, int line, const char* case_name, - const char* test_name) { - if (registered_) { - fprintf(stderr, "%s Test %s must be defined before " - "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n", - FormatFileLocation(file, line).c_str(), test_name, case_name); - fflush(stderr); - posix::Abort(); - } - defined_test_names_.insert(test_name); - return true; - } - - // Verifies that registered_tests match the test names in - // defined_test_names_; returns registered_tests if successful, or - // aborts the program otherwise. - const char* VerifyRegisteredTestNames( - const char* file, int line, const char* registered_tests); - - private: - bool registered_; - ::std::set defined_test_names_; -}; - -// Skips to the first non-space char after the first comma in 'str'; -// returns NULL if no comma is found in 'str'. -inline const char* SkipComma(const char* str) { - const char* comma = strchr(str, ','); - if (comma == NULL) { - return NULL; - } - while (IsSpace(*(++comma))) {} - return comma; -} - -// Returns the prefix of 'str' before the first comma in it; returns -// the entire string if it contains no comma. -inline String GetPrefixUntilComma(const char* str) { - const char* comma = strchr(str, ','); - return comma == NULL ? String(str) : String(str, comma - str); -} - -// TypeParameterizedTest::Register() -// registers a list of type-parameterized tests with Google Test. The -// return value is insignificant - we just need to return something -// such that we can call this function in a namespace scope. -// -// Implementation note: The GTEST_TEMPLATE_ macro declares a template -// template parameter. It's defined in gtest-type-util.h. -template -class TypeParameterizedTest { - public: - // 'index' is the index of the test in the type list 'Types' - // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase, - // Types). Valid values for 'index' are [0, N - 1] where N is the - // length of Types. - static bool Register(const char* prefix, const char* case_name, - const char* test_names, int index) { - typedef typename Types::Head Type; - typedef Fixture FixtureClass; - typedef typename GTEST_BIND_(TestSel, Type) TestClass; - - // First, registers the first type-parameterized test in the type - // list. - MakeAndRegisterTestInfo( - String::Format("%s%s%s/%d", prefix, prefix[0] == '\0' ? "" : "/", - case_name, index).c_str(), - GetPrefixUntilComma(test_names).c_str(), - GetTypeName().c_str(), - NULL, // No value parameter. - GetTypeId(), - TestClass::SetUpTestCase, - TestClass::TearDownTestCase, - new TestFactoryImpl); - - // Next, recurses (at compile time) with the tail of the type list. - return TypeParameterizedTest - ::Register(prefix, case_name, test_names, index + 1); - } -}; - -// The base case for the compile time recursion. -template -class TypeParameterizedTest { - public: - static bool Register(const char* /*prefix*/, const char* /*case_name*/, - const char* /*test_names*/, int /*index*/) { - return true; - } -}; - -// TypeParameterizedTestCase::Register() -// registers *all combinations* of 'Tests' and 'Types' with Google -// Test. The return value is insignificant - we just need to return -// something such that we can call this function in a namespace scope. -template -class TypeParameterizedTestCase { - public: - static bool Register(const char* prefix, const char* case_name, - const char* test_names) { - typedef typename Tests::Head Head; - - // First, register the first test in 'Test' for each type in 'Types'. - TypeParameterizedTest::Register( - prefix, case_name, test_names, 0); - - // Next, recurses (at compile time) with the tail of the test list. - return TypeParameterizedTestCase - ::Register(prefix, case_name, SkipComma(test_names)); - } -}; - -// The base case for the compile time recursion. -template -class TypeParameterizedTestCase { - public: - static bool Register(const char* /*prefix*/, const char* /*case_name*/, - const char* /*test_names*/) { - return true; - } -}; - -#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P - -// Returns the current OS stack trace as a String. -// -// The maximum number of stack frames to be included is specified by -// the gtest_stack_trace_depth flag. The skip_count parameter -// specifies the number of top frames to be skipped, which doesn't -// count against the number of frames to be included. -// -// For example, if Foo() calls Bar(), which in turn calls -// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in -// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't. -GTEST_API_ String GetCurrentOsStackTraceExceptTop(UnitTest* unit_test, - int skip_count); - -// Helpers for suppressing warnings on unreachable code or constant -// condition. - -// Always returns true. -GTEST_API_ bool AlwaysTrue(); - -// Always returns false. -inline bool AlwaysFalse() { return !AlwaysTrue(); } - -// Helper for suppressing false warning from Clang on a const char* -// variable declared in a conditional expression always being NULL in -// the else branch. -struct GTEST_API_ ConstCharPtr { - ConstCharPtr(const char* str) : value(str) {} - operator bool() const { return true; } - const char* value; -}; - -// A simple Linear Congruential Generator for generating random -// numbers with a uniform distribution. Unlike rand() and srand(), it -// doesn't use global state (and therefore can't interfere with user -// code). Unlike rand_r(), it's portable. An LCG isn't very random, -// but it's good enough for our purposes. -class GTEST_API_ Random { - public: - static const UInt32 kMaxRange = 1u << 31; - - explicit Random(UInt32 seed) : state_(seed) {} - - void Reseed(UInt32 seed) { state_ = seed; } - - // Generates a random number from [0, range). Crashes if 'range' is - // 0 or greater than kMaxRange. - UInt32 Generate(UInt32 range); - - private: - UInt32 state_; - GTEST_DISALLOW_COPY_AND_ASSIGN_(Random); -}; - -// Defining a variable of type CompileAssertTypesEqual will cause a -// compiler error iff T1 and T2 are different types. -template -struct CompileAssertTypesEqual; - -template -struct CompileAssertTypesEqual { -}; - -// Removes the reference from a type if it is a reference type, -// otherwise leaves it unchanged. This is the same as -// tr1::remove_reference, which is not widely available yet. -template -struct RemoveReference { typedef T type; }; // NOLINT -template -struct RemoveReference { typedef T type; }; // NOLINT - -// A handy wrapper around RemoveReference that works when the argument -// T depends on template parameters. -#define GTEST_REMOVE_REFERENCE_(T) \ - typename ::testing::internal::RemoveReference::type - -// Removes const from a type if it is a const type, otherwise leaves -// it unchanged. This is the same as tr1::remove_const, which is not -// widely available yet. -template -struct RemoveConst { typedef T type; }; // NOLINT -template -struct RemoveConst { typedef T type; }; // NOLINT - -// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above -// definition to fail to remove the const in 'const int[3]' and 'const -// char[3][4]'. The following specialization works around the bug. -// However, it causes trouble with GCC and thus needs to be -// conditionally compiled. -#if defined(_MSC_VER) || defined(__SUNPRO_CC) || defined(__IBMCPP__) -template -struct RemoveConst { - typedef typename RemoveConst::type type[N]; -}; -#endif - -// A handy wrapper around RemoveConst that works when the argument -// T depends on template parameters. -#define GTEST_REMOVE_CONST_(T) \ - typename ::testing::internal::RemoveConst::type - -// Turns const U&, U&, const U, and U all into U. -#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \ - GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T)) - -// Adds reference to a type if it is not a reference type, -// otherwise leaves it unchanged. This is the same as -// tr1::add_reference, which is not widely available yet. -template -struct AddReference { typedef T& type; }; // NOLINT -template -struct AddReference { typedef T& type; }; // NOLINT - -// A handy wrapper around AddReference that works when the argument T -// depends on template parameters. -#define GTEST_ADD_REFERENCE_(T) \ - typename ::testing::internal::AddReference::type - -// Adds a reference to const on top of T as necessary. For example, -// it transforms -// -// char ==> const char& -// const char ==> const char& -// char& ==> const char& -// const char& ==> const char& -// -// The argument T must depend on some template parameters. -#define GTEST_REFERENCE_TO_CONST_(T) \ - GTEST_ADD_REFERENCE_(const GTEST_REMOVE_REFERENCE_(T)) - -// ImplicitlyConvertible::value is a compile-time bool -// constant that's true iff type From can be implicitly converted to -// type To. -template -class ImplicitlyConvertible { - private: - // We need the following helper functions only for their types. - // They have no implementations. - - // MakeFrom() is an expression whose type is From. We cannot simply - // use From(), as the type From may not have a public default - // constructor. - static From MakeFrom(); - - // These two functions are overloaded. Given an expression - // Helper(x), the compiler will pick the first version if x can be - // implicitly converted to type To; otherwise it will pick the - // second version. - // - // The first version returns a value of size 1, and the second - // version returns a value of size 2. Therefore, by checking the - // size of Helper(x), which can be done at compile time, we can tell - // which version of Helper() is used, and hence whether x can be - // implicitly converted to type To. - static char Helper(To); - static char (&Helper(...))[2]; // NOLINT - - // We have to put the 'public' section after the 'private' section, - // or MSVC refuses to compile the code. - public: - // MSVC warns about implicitly converting from double to int for - // possible loss of data, so we need to temporarily disable the - // warning. -#ifdef _MSC_VER -# pragma warning(push) // Saves the current warning state. -# pragma warning(disable:4244) // Temporarily disables warning 4244. - - static const bool value = - sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1; -# pragma warning(pop) // Restores the warning state. -#elif defined(__BORLANDC__) - // C++Builder cannot use member overload resolution during template - // instantiation. The simplest workaround is to use its C++0x type traits - // functions (C++Builder 2009 and above only). - static const bool value = __is_convertible(From, To); -#else - static const bool value = - sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1; -#endif // _MSV_VER -}; -template -const bool ImplicitlyConvertible::value; - -// IsAProtocolMessage::value is a compile-time bool constant that's -// true iff T is type ProtocolMessage, proto2::Message, or a subclass -// of those. -template -struct IsAProtocolMessage - : public bool_constant< - ImplicitlyConvertible::value || - ImplicitlyConvertible::value> { -}; - -// When the compiler sees expression IsContainerTest(0), if C is an -// STL-style container class, the first overload of IsContainerTest -// will be viable (since both C::iterator* and C::const_iterator* are -// valid types and NULL can be implicitly converted to them). It will -// be picked over the second overload as 'int' is a perfect match for -// the type of argument 0. If C::iterator or C::const_iterator is not -// a valid type, the first overload is not viable, and the second -// overload will be picked. Therefore, we can determine whether C is -// a container class by checking the type of IsContainerTest(0). -// The value of the expression is insignificant. -// -// Note that we look for both C::iterator and C::const_iterator. The -// reason is that C++ injects the name of a class as a member of the -// class itself (e.g. you can refer to class iterator as either -// 'iterator' or 'iterator::iterator'). If we look for C::iterator -// only, for example, we would mistakenly think that a class named -// iterator is an STL container. -// -// Also note that the simpler approach of overloading -// IsContainerTest(typename C::const_iterator*) and -// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++. -typedef int IsContainer; -template -IsContainer IsContainerTest(int /* dummy */, - typename C::iterator* /* it */ = NULL, - typename C::const_iterator* /* const_it */ = NULL) { - return 0; -} - -typedef char IsNotContainer; -template -IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; } - -// EnableIf::type is void when 'Cond' is true, and -// undefined when 'Cond' is false. To use SFINAE to make a function -// overload only apply when a particular expression is true, add -// "typename EnableIf::type* = 0" as the last parameter. -template struct EnableIf; -template<> struct EnableIf { typedef void type; }; // NOLINT - -// Utilities for native arrays. - -// ArrayEq() compares two k-dimensional native arrays using the -// elements' operator==, where k can be any integer >= 0. When k is -// 0, ArrayEq() degenerates into comparing a single pair of values. - -template -bool ArrayEq(const T* lhs, size_t size, const U* rhs); - -// This generic version is used when k is 0. -template -inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; } - -// This overload is used when k >= 1. -template -inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) { - return internal::ArrayEq(lhs, N, rhs); -} - -// This helper reduces code bloat. If we instead put its logic inside -// the previous ArrayEq() function, arrays with different sizes would -// lead to different copies of the template code. -template -bool ArrayEq(const T* lhs, size_t size, const U* rhs) { - for (size_t i = 0; i != size; i++) { - if (!internal::ArrayEq(lhs[i], rhs[i])) - return false; - } - return true; -} - -// Finds the first element in the iterator range [begin, end) that -// equals elem. Element may be a native array type itself. -template -Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) { - for (Iter it = begin; it != end; ++it) { - if (internal::ArrayEq(*it, elem)) - return it; - } - return end; -} - -// CopyArray() copies a k-dimensional native array using the elements' -// operator=, where k can be any integer >= 0. When k is 0, -// CopyArray() degenerates into copying a single value. - -template -void CopyArray(const T* from, size_t size, U* to); - -// This generic version is used when k is 0. -template -inline void CopyArray(const T& from, U* to) { *to = from; } - -// This overload is used when k >= 1. -template -inline void CopyArray(const T(&from)[N], U(*to)[N]) { - internal::CopyArray(from, N, *to); -} - -// This helper reduces code bloat. If we instead put its logic inside -// the previous CopyArray() function, arrays with different sizes -// would lead to different copies of the template code. -template -void CopyArray(const T* from, size_t size, U* to) { - for (size_t i = 0; i != size; i++) { - internal::CopyArray(from[i], to + i); - } -} - -// The relation between an NativeArray object (see below) and the -// native array it represents. -enum RelationToSource { - kReference, // The NativeArray references the native array. - kCopy // The NativeArray makes a copy of the native array and - // owns the copy. -}; - -// Adapts a native array to a read-only STL-style container. Instead -// of the complete STL container concept, this adaptor only implements -// members useful for Google Mock's container matchers. New members -// should be added as needed. To simplify the implementation, we only -// support Element being a raw type (i.e. having no top-level const or -// reference modifier). It's the client's responsibility to satisfy -// this requirement. Element can be an array type itself (hence -// multi-dimensional arrays are supported). -template -class NativeArray { - public: - // STL-style container typedefs. - typedef Element value_type; - typedef Element* iterator; - typedef const Element* const_iterator; - - // Constructs from a native array. - NativeArray(const Element* array, size_t count, RelationToSource relation) { - Init(array, count, relation); - } - - // Copy constructor. - NativeArray(const NativeArray& rhs) { - Init(rhs.array_, rhs.size_, rhs.relation_to_source_); - } - - ~NativeArray() { - // Ensures that the user doesn't instantiate NativeArray with a - // const or reference type. - static_cast(StaticAssertTypeEqHelper()); - if (relation_to_source_ == kCopy) - delete[] array_; - } - - // STL-style container methods. - size_t size() const { return size_; } - const_iterator begin() const { return array_; } - const_iterator end() const { return array_ + size_; } - bool operator==(const NativeArray& rhs) const { - return size() == rhs.size() && - ArrayEq(begin(), size(), rhs.begin()); - } - - private: - // Initializes this object; makes a copy of the input array if - // 'relation' is kCopy. - void Init(const Element* array, size_t a_size, RelationToSource relation) { - if (relation == kReference) { - array_ = array; - } else { - Element* const copy = new Element[a_size]; - CopyArray(array, a_size, copy); - array_ = copy; - } - size_ = a_size; - relation_to_source_ = relation; - } - - const Element* array_; - size_t size_; - RelationToSource relation_to_source_; - - GTEST_DISALLOW_ASSIGN_(NativeArray); -}; - -} // namespace internal -} // namespace testing - -#define GTEST_MESSAGE_AT_(file, line, message, result_type) \ - ::testing::internal::AssertHelper(result_type, file, line, message) \ - = ::testing::Message() - -#define GTEST_MESSAGE_(message, result_type) \ - GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type) - -#define GTEST_FATAL_FAILURE_(message) \ - return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure) - -#define GTEST_NONFATAL_FAILURE_(message) \ - GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure) - -#define GTEST_SUCCESS_(message) \ - GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess) - -// Suppresses MSVC warnings 4072 (unreachable code) for the code following -// statement if it returns or throws (or doesn't return or throw in some -// situations). -#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \ - if (::testing::internal::AlwaysTrue()) { statement; } - -#define GTEST_TEST_THROW_(statement, expected_exception, fail) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (::testing::internal::ConstCharPtr gtest_msg = "") { \ - bool gtest_caught_expected = false; \ - try { \ - GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ - } \ - catch (expected_exception const&) { \ - gtest_caught_expected = true; \ - } \ - catch (...) { \ - gtest_msg.value = \ - "Expected: " #statement " throws an exception of type " \ - #expected_exception ".\n Actual: it throws a different type."; \ - goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \ - } \ - if (!gtest_caught_expected) { \ - gtest_msg.value = \ - "Expected: " #statement " throws an exception of type " \ - #expected_exception ".\n Actual: it throws nothing."; \ - goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \ - } \ - } else \ - GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \ - fail(gtest_msg.value) - -#define GTEST_TEST_NO_THROW_(statement, fail) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (::testing::internal::AlwaysTrue()) { \ - try { \ - GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ - } \ - catch (...) { \ - goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \ - } \ - } else \ - GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \ - fail("Expected: " #statement " doesn't throw an exception.\n" \ - " Actual: it throws.") - -#define GTEST_TEST_ANY_THROW_(statement, fail) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (::testing::internal::AlwaysTrue()) { \ - bool gtest_caught_any = false; \ - try { \ - GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ - } \ - catch (...) { \ - gtest_caught_any = true; \ - } \ - if (!gtest_caught_any) { \ - goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \ - } \ - } else \ - GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \ - fail("Expected: " #statement " throws an exception.\n" \ - " Actual: it doesn't.") - - -// Implements Boolean test assertions such as EXPECT_TRUE. expression can be -// either a boolean expression or an AssertionResult. text is a textual -// represenation of expression as it was passed into the EXPECT_TRUE. -#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (const ::testing::AssertionResult gtest_ar_ = \ - ::testing::AssertionResult(expression)) \ - ; \ - else \ - fail(::testing::internal::GetBoolAssertionFailureMessage(\ - gtest_ar_, text, #actual, #expected).c_str()) - -#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (::testing::internal::AlwaysTrue()) { \ - ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \ - GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ - if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \ - goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \ - } \ - } else \ - GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \ - fail("Expected: " #statement " doesn't generate new fatal " \ - "failures in the current thread.\n" \ - " Actual: it does.") - -// Expands to the name of the class that implements the given test. -#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ - test_case_name##_##test_name##_Test - -// Helper macro for defining tests. -#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\ -class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\ - public:\ - GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\ - private:\ - virtual void TestBody();\ - static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\ - GTEST_DISALLOW_COPY_AND_ASSIGN_(\ - GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\ -};\ -\ -::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\ - ::test_info_ =\ - ::testing::internal::MakeAndRegisterTestInfo(\ - #test_case_name, #test_name, NULL, NULL, \ - (parent_id), \ - parent_class::SetUpTestCase, \ - parent_class::TearDownTestCase, \ - new ::testing::internal::TestFactoryImpl<\ - GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\ -void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() - -#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ -// Copyright 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) -// -// The Google C++ Testing Framework (Google Test) -// -// This header file defines the public API for death tests. It is -// #included by gtest.h so a user doesn't need to include this -// directly. - -#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ -#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ - -// Copyright 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) -// -// The Google C++ Testing Framework (Google Test) -// -// This header file defines internal utilities needed for implementing -// death tests. They are subject to change without notice. - -#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ -#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ - - -#include - -namespace testing { -namespace internal { - -GTEST_DECLARE_string_(internal_run_death_test); - -// Names of the flags (needed for parsing Google Test flags). -const char kDeathTestStyleFlag[] = "death_test_style"; -const char kDeathTestUseFork[] = "death_test_use_fork"; -const char kInternalRunDeathTestFlag[] = "internal_run_death_test"; - -#if GTEST_HAS_DEATH_TEST - -// DeathTest is a class that hides much of the complexity of the -// GTEST_DEATH_TEST_ macro. It is abstract; its static Create method -// returns a concrete class that depends on the prevailing death test -// style, as defined by the --gtest_death_test_style and/or -// --gtest_internal_run_death_test flags. - -// In describing the results of death tests, these terms are used with -// the corresponding definitions: -// -// exit status: The integer exit information in the format specified -// by wait(2) -// exit code: The integer code passed to exit(3), _exit(2), or -// returned from main() -class GTEST_API_ DeathTest { - public: - // Create returns false if there was an error determining the - // appropriate action to take for the current death test; for example, - // if the gtest_death_test_style flag is set to an invalid value. - // The LastMessage method will return a more detailed message in that - // case. Otherwise, the DeathTest pointer pointed to by the "test" - // argument is set. If the death test should be skipped, the pointer - // is set to NULL; otherwise, it is set to the address of a new concrete - // DeathTest object that controls the execution of the current test. - static bool Create(const char* statement, const RE* regex, - const char* file, int line, DeathTest** test); - DeathTest(); - virtual ~DeathTest() { } - - // A helper class that aborts a death test when it's deleted. - class ReturnSentinel { - public: - explicit ReturnSentinel(DeathTest* test) : test_(test) { } - ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); } - private: - DeathTest* const test_; - GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel); - } GTEST_ATTRIBUTE_UNUSED_; - - // An enumeration of possible roles that may be taken when a death - // test is encountered. EXECUTE means that the death test logic should - // be executed immediately. OVERSEE means that the program should prepare - // the appropriate environment for a child process to execute the death - // test, then wait for it to complete. - enum TestRole { OVERSEE_TEST, EXECUTE_TEST }; - - // An enumeration of the three reasons that a test might be aborted. - enum AbortReason { - TEST_ENCOUNTERED_RETURN_STATEMENT, - TEST_THREW_EXCEPTION, - TEST_DID_NOT_DIE - }; - - // Assumes one of the above roles. - virtual TestRole AssumeRole() = 0; - - // Waits for the death test to finish and returns its status. - virtual int Wait() = 0; - - // Returns true if the death test passed; that is, the test process - // exited during the test, its exit status matches a user-supplied - // predicate, and its stderr output matches a user-supplied regular - // expression. - // The user-supplied predicate may be a macro expression rather - // than a function pointer or functor, or else Wait and Passed could - // be combined. - virtual bool Passed(bool exit_status_ok) = 0; - - // Signals that the death test did not die as expected. - virtual void Abort(AbortReason reason) = 0; - - // Returns a human-readable outcome message regarding the outcome of - // the last death test. - static const char* LastMessage(); - - static void set_last_death_test_message(const String& message); - - private: - // A string containing a description of the outcome of the last death test. - static String last_death_test_message_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest); -}; - -// Factory interface for death tests. May be mocked out for testing. -class DeathTestFactory { - public: - virtual ~DeathTestFactory() { } - virtual bool Create(const char* statement, const RE* regex, - const char* file, int line, DeathTest** test) = 0; -}; - -// A concrete DeathTestFactory implementation for normal use. -class DefaultDeathTestFactory : public DeathTestFactory { - public: - virtual bool Create(const char* statement, const RE* regex, - const char* file, int line, DeathTest** test); -}; - -// Returns true if exit_status describes a process that was terminated -// by a signal, or exited normally with a nonzero exit code. -GTEST_API_ bool ExitedUnsuccessfully(int exit_status); - -// Traps C++ exceptions escaping statement and reports them as test -// failures. Note that trapping SEH exceptions is not implemented here. -# if GTEST_HAS_EXCEPTIONS -# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \ - try { \ - GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ - } catch (const ::std::exception& gtest_exception) { \ - fprintf(\ - stderr, \ - "\n%s: Caught std::exception-derived exception escaping the " \ - "death test statement. Exception message: %s\n", \ - ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \ - gtest_exception.what()); \ - fflush(stderr); \ - death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \ - } catch (...) { \ - death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \ - } - -# else -# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \ - GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) - -# endif - -// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*, -// ASSERT_EXIT*, and EXPECT_EXIT*. -# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (::testing::internal::AlwaysTrue()) { \ - const ::testing::internal::RE& gtest_regex = (regex); \ - ::testing::internal::DeathTest* gtest_dt; \ - if (!::testing::internal::DeathTest::Create(#statement, >est_regex, \ - __FILE__, __LINE__, >est_dt)) { \ - goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \ - } \ - if (gtest_dt != NULL) { \ - ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \ - gtest_dt_ptr(gtest_dt); \ - switch (gtest_dt->AssumeRole()) { \ - case ::testing::internal::DeathTest::OVERSEE_TEST: \ - if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \ - goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \ - } \ - break; \ - case ::testing::internal::DeathTest::EXECUTE_TEST: { \ - ::testing::internal::DeathTest::ReturnSentinel \ - gtest_sentinel(gtest_dt); \ - GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \ - gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \ - break; \ - } \ - default: \ - break; \ - } \ - } \ - } else \ - GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \ - fail(::testing::internal::DeathTest::LastMessage()) -// The symbol "fail" here expands to something into which a message -// can be streamed. - -// A class representing the parsed contents of the -// --gtest_internal_run_death_test flag, as it existed when -// RUN_ALL_TESTS was called. -class InternalRunDeathTestFlag { - public: - InternalRunDeathTestFlag(const String& a_file, - int a_line, - int an_index, - int a_write_fd) - : file_(a_file), line_(a_line), index_(an_index), - write_fd_(a_write_fd) {} - - ~InternalRunDeathTestFlag() { - if (write_fd_ >= 0) - posix::Close(write_fd_); - } - - String file() const { return file_; } - int line() const { return line_; } - int index() const { return index_; } - int write_fd() const { return write_fd_; } - - private: - String file_; - int line_; - int index_; - int write_fd_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag); -}; - -// Returns a newly created InternalRunDeathTestFlag object with fields -// initialized from the GTEST_FLAG(internal_run_death_test) flag if -// the flag is specified; otherwise returns NULL. -InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag(); - -#else // GTEST_HAS_DEATH_TEST - -// This macro is used for implementing macros such as -// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where -// death tests are not supported. Those macros must compile on such systems -// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on -// systems that support death tests. This allows one to write such a macro -// on a system that does not support death tests and be sure that it will -// compile on a death-test supporting system. -// -// Parameters: -// statement - A statement that a macro such as EXPECT_DEATH would test -// for program termination. This macro has to make sure this -// statement is compiled but not executed, to ensure that -// EXPECT_DEATH_IF_SUPPORTED compiles with a certain -// parameter iff EXPECT_DEATH compiles with it. -// regex - A regex that a macro such as EXPECT_DEATH would use to test -// the output of statement. This parameter has to be -// compiled but not evaluated by this macro, to ensure that -// this macro only accepts expressions that a macro such as -// EXPECT_DEATH would accept. -// terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED -// and a return statement for ASSERT_DEATH_IF_SUPPORTED. -// This ensures that ASSERT_DEATH_IF_SUPPORTED will not -// compile inside functions where ASSERT_DEATH doesn't -// compile. -// -// The branch that has an always false condition is used to ensure that -// statement and regex are compiled (and thus syntactically correct) but -// never executed. The unreachable code macro protects the terminator -// statement from generating an 'unreachable code' warning in case -// statement unconditionally returns or throws. The Message constructor at -// the end allows the syntax of streaming additional messages into the -// macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH. -# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (::testing::internal::AlwaysTrue()) { \ - GTEST_LOG_(WARNING) \ - << "Death tests are not supported on this platform.\n" \ - << "Statement '" #statement "' cannot be verified."; \ - } else if (::testing::internal::AlwaysFalse()) { \ - ::testing::internal::RE::PartialMatch(".*", (regex)); \ - GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ - terminator; \ - } else \ - ::testing::Message() - -#endif // GTEST_HAS_DEATH_TEST - -} // namespace internal -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ - -namespace testing { - -// This flag controls the style of death tests. Valid values are "threadsafe", -// meaning that the death test child process will re-execute the test binary -// from the start, running only a single death test, or "fast", -// meaning that the child process will execute the test logic immediately -// after forking. -GTEST_DECLARE_string_(death_test_style); - -#if GTEST_HAS_DEATH_TEST - -// The following macros are useful for writing death tests. - -// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is -// executed: -// -// 1. It generates a warning if there is more than one active -// thread. This is because it's safe to fork() or clone() only -// when there is a single thread. -// -// 2. The parent process clone()s a sub-process and runs the death -// test in it; the sub-process exits with code 0 at the end of the -// death test, if it hasn't exited already. -// -// 3. The parent process waits for the sub-process to terminate. -// -// 4. The parent process checks the exit code and error message of -// the sub-process. -// -// Examples: -// -// ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number"); -// for (int i = 0; i < 5; i++) { -// EXPECT_DEATH(server.ProcessRequest(i), -// "Invalid request .* in ProcessRequest()") -// << "Failed to die on request " << i); -// } -// -// ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting"); -// -// bool KilledBySIGHUP(int exit_code) { -// return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP; -// } -// -// ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!"); -// -// On the regular expressions used in death tests: -// -// On POSIX-compliant systems (*nix), we use the library, -// which uses the POSIX extended regex syntax. -// -// On other platforms (e.g. Windows), we only support a simple regex -// syntax implemented as part of Google Test. This limited -// implementation should be enough most of the time when writing -// death tests; though it lacks many features you can find in PCRE -// or POSIX extended regex syntax. For example, we don't support -// union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and -// repetition count ("x{5,7}"), among others. -// -// Below is the syntax that we do support. We chose it to be a -// subset of both PCRE and POSIX extended regex, so it's easy to -// learn wherever you come from. In the following: 'A' denotes a -// literal character, period (.), or a single \\ escape sequence; -// 'x' and 'y' denote regular expressions; 'm' and 'n' are for -// natural numbers. -// -// c matches any literal character c -// \\d matches any decimal digit -// \\D matches any character that's not a decimal digit -// \\f matches \f -// \\n matches \n -// \\r matches \r -// \\s matches any ASCII whitespace, including \n -// \\S matches any character that's not a whitespace -// \\t matches \t -// \\v matches \v -// \\w matches any letter, _, or decimal digit -// \\W matches any character that \\w doesn't match -// \\c matches any literal character c, which must be a punctuation -// . matches any single character except \n -// A? matches 0 or 1 occurrences of A -// A* matches 0 or many occurrences of A -// A+ matches 1 or many occurrences of A -// ^ matches the beginning of a string (not that of each line) -// $ matches the end of a string (not that of each line) -// xy matches x followed by y -// -// If you accidentally use PCRE or POSIX extended regex features -// not implemented by us, you will get a run-time failure. In that -// case, please try to rewrite your regular expression within the -// above syntax. -// -// This implementation is *not* meant to be as highly tuned or robust -// as a compiled regex library, but should perform well enough for a -// death test, which already incurs significant overhead by launching -// a child process. -// -// Known caveats: -// -// A "threadsafe" style death test obtains the path to the test -// program from argv[0] and re-executes it in the sub-process. For -// simplicity, the current implementation doesn't search the PATH -// when launching the sub-process. This means that the user must -// invoke the test program via a path that contains at least one -// path separator (e.g. path/to/foo_test and -// /absolute/path/to/bar_test are fine, but foo_test is not). This -// is rarely a problem as people usually don't put the test binary -// directory in PATH. -// -// TODO(wan@google.com): make thread-safe death tests search the PATH. - -// Asserts that a given statement causes the program to exit, with an -// integer exit status that satisfies predicate, and emitting error output -// that matches regex. -# define ASSERT_EXIT(statement, predicate, regex) \ - GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_) - -// Like ASSERT_EXIT, but continues on to successive tests in the -// test case, if any: -# define EXPECT_EXIT(statement, predicate, regex) \ - GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_) - -// Asserts that a given statement causes the program to exit, either by -// explicitly exiting with a nonzero exit code or being killed by a -// signal, and emitting error output that matches regex. -# define ASSERT_DEATH(statement, regex) \ - ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex) - -// Like ASSERT_DEATH, but continues on to successive tests in the -// test case, if any: -# define EXPECT_DEATH(statement, regex) \ - EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex) - -// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*: - -// Tests that an exit code describes a normal exit with a given exit code. -class GTEST_API_ ExitedWithCode { - public: - explicit ExitedWithCode(int exit_code); - bool operator()(int exit_status) const; - private: - // No implementation - assignment is unsupported. - void operator=(const ExitedWithCode& other); - - const int exit_code_; -}; - -# if !GTEST_OS_WINDOWS -// Tests that an exit code describes an exit due to termination by a -// given signal. -class GTEST_API_ KilledBySignal { - public: - explicit KilledBySignal(int signum); - bool operator()(int exit_status) const; - private: - const int signum_; -}; -# endif // !GTEST_OS_WINDOWS - -// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode. -// The death testing framework causes this to have interesting semantics, -// since the sideeffects of the call are only visible in opt mode, and not -// in debug mode. -// -// In practice, this can be used to test functions that utilize the -// LOG(DFATAL) macro using the following style: -// -// int DieInDebugOr12(int* sideeffect) { -// if (sideeffect) { -// *sideeffect = 12; -// } -// LOG(DFATAL) << "death"; -// return 12; -// } -// -// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) { -// int sideeffect = 0; -// // Only asserts in dbg. -// EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death"); -// -// #ifdef NDEBUG -// // opt-mode has sideeffect visible. -// EXPECT_EQ(12, sideeffect); -// #else -// // dbg-mode no visible sideeffect. -// EXPECT_EQ(0, sideeffect); -// #endif -// } -// -// This will assert that DieInDebugReturn12InOpt() crashes in debug -// mode, usually due to a DCHECK or LOG(DFATAL), but returns the -// appropriate fallback value (12 in this case) in opt mode. If you -// need to test that a function has appropriate side-effects in opt -// mode, include assertions against the side-effects. A general -// pattern for this is: -// -// EXPECT_DEBUG_DEATH({ -// // Side-effects here will have an effect after this statement in -// // opt mode, but none in debug mode. -// EXPECT_EQ(12, DieInDebugOr12(&sideeffect)); -// }, "death"); -// -# ifdef NDEBUG - -# define EXPECT_DEBUG_DEATH(statement, regex) \ - do { statement; } while (::testing::internal::AlwaysFalse()) - -# define ASSERT_DEBUG_DEATH(statement, regex) \ - do { statement; } while (::testing::internal::AlwaysFalse()) - -# else - -# define EXPECT_DEBUG_DEATH(statement, regex) \ - EXPECT_DEATH(statement, regex) - -# define ASSERT_DEBUG_DEATH(statement, regex) \ - ASSERT_DEATH(statement, regex) - -# endif // NDEBUG for EXPECT_DEBUG_DEATH -#endif // GTEST_HAS_DEATH_TEST - -// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and -// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if -// death tests are supported; otherwise they just issue a warning. This is -// useful when you are combining death test assertions with normal test -// assertions in one test. -#if GTEST_HAS_DEATH_TEST -# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ - EXPECT_DEATH(statement, regex) -# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \ - ASSERT_DEATH(statement, regex) -#else -# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ - GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, ) -# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \ - GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return) -#endif - -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ -// Copyright 2005, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) -// -// The Google C++ Testing Framework (Google Test) -// -// This header file defines the Message class. -// -// IMPORTANT NOTE: Due to limitation of the C++ language, we have to -// leave some internal implementation details in this header file. -// They are clearly marked by comments like this: -// -// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -// -// Such code is NOT meant to be used by a user directly, and is subject -// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user -// program! - -#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ -#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ - -#include - - -namespace testing { - -// The Message class works like an ostream repeater. -// -// Typical usage: -// -// 1. You stream a bunch of values to a Message object. -// It will remember the text in a stringstream. -// 2. Then you stream the Message object to an ostream. -// This causes the text in the Message to be streamed -// to the ostream. -// -// For example; -// -// testing::Message foo; -// foo << 1 << " != " << 2; -// std::cout << foo; -// -// will print "1 != 2". -// -// Message is not intended to be inherited from. In particular, its -// destructor is not virtual. -// -// Note that stringstream behaves differently in gcc and in MSVC. You -// can stream a NULL char pointer to it in the former, but not in the -// latter (it causes an access violation if you do). The Message -// class hides this difference by treating a NULL char pointer as -// "(null)". -class GTEST_API_ Message { - private: - // The type of basic IO manipulators (endl, ends, and flush) for - // narrow streams. - typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&); - - public: - // Constructs an empty Message. - // We allocate the stringstream separately because otherwise each use of - // ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's - // stack frame leading to huge stack frames in some cases; gcc does not reuse - // the stack space. - Message() : ss_(new ::std::stringstream) { - // By default, we want there to be enough precision when printing - // a double to a Message. - *ss_ << std::setprecision(std::numeric_limits::digits10 + 2); - } - - // Copy constructor. - Message(const Message& msg) : ss_(new ::std::stringstream) { // NOLINT - *ss_ << msg.GetString(); - } - - // Constructs a Message from a C-string. - explicit Message(const char* str) : ss_(new ::std::stringstream) { - *ss_ << str; - } - -#if GTEST_OS_SYMBIAN - // Streams a value (either a pointer or not) to this object. - template - inline Message& operator <<(const T& value) { - StreamHelper(typename internal::is_pointer::type(), value); - return *this; - } -#else - // Streams a non-pointer value to this object. - template - inline Message& operator <<(const T& val) { - ::GTestStreamToHelper(ss_.get(), val); - return *this; - } - - // Streams a pointer value to this object. - // - // This function is an overload of the previous one. When you - // stream a pointer to a Message, this definition will be used as it - // is more specialized. (The C++ Standard, section - // [temp.func.order].) If you stream a non-pointer, then the - // previous definition will be used. - // - // The reason for this overload is that streaming a NULL pointer to - // ostream is undefined behavior. Depending on the compiler, you - // may get "0", "(nil)", "(null)", or an access violation. To - // ensure consistent result across compilers, we always treat NULL - // as "(null)". - template - inline Message& operator <<(T* const& pointer) { // NOLINT - if (pointer == NULL) { - *ss_ << "(null)"; - } else { - ::GTestStreamToHelper(ss_.get(), pointer); - } - return *this; - } -#endif // GTEST_OS_SYMBIAN - - // Since the basic IO manipulators are overloaded for both narrow - // and wide streams, we have to provide this specialized definition - // of operator <<, even though its body is the same as the - // templatized version above. Without this definition, streaming - // endl or other basic IO manipulators to Message will confuse the - // compiler. - Message& operator <<(BasicNarrowIoManip val) { - *ss_ << val; - return *this; - } - - // Instead of 1/0, we want to see true/false for bool values. - Message& operator <<(bool b) { - return *this << (b ? "true" : "false"); - } - - // These two overloads allow streaming a wide C string to a Message - // using the UTF-8 encoding. - Message& operator <<(const wchar_t* wide_c_str) { - return *this << internal::String::ShowWideCString(wide_c_str); - } - Message& operator <<(wchar_t* wide_c_str) { - return *this << internal::String::ShowWideCString(wide_c_str); - } - -#if GTEST_HAS_STD_WSTRING - // Converts the given wide string to a narrow string using the UTF-8 - // encoding, and streams the result to this Message object. - Message& operator <<(const ::std::wstring& wstr); -#endif // GTEST_HAS_STD_WSTRING - -#if GTEST_HAS_GLOBAL_WSTRING - // Converts the given wide string to a narrow string using the UTF-8 - // encoding, and streams the result to this Message object. - Message& operator <<(const ::wstring& wstr); -#endif // GTEST_HAS_GLOBAL_WSTRING - - // Gets the text streamed to this object so far as a String. - // Each '\0' character in the buffer is replaced with "\\0". - // - // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. - internal::String GetString() const { - return internal::StringStreamToString(ss_.get()); - } - - private: - -#if GTEST_OS_SYMBIAN - // These are needed as the Nokia Symbian Compiler cannot decide between - // const T& and const T* in a function template. The Nokia compiler _can_ - // decide between class template specializations for T and T*, so a - // tr1::type_traits-like is_pointer works, and we can overload on that. - template - inline void StreamHelper(internal::true_type /*dummy*/, T* pointer) { - if (pointer == NULL) { - *ss_ << "(null)"; - } else { - ::GTestStreamToHelper(ss_.get(), pointer); - } - } - template - inline void StreamHelper(internal::false_type /*dummy*/, const T& value) { - ::GTestStreamToHelper(ss_.get(), value); - } -#endif // GTEST_OS_SYMBIAN - - // We'll hold the text streamed to this object here. - const internal::scoped_ptr< ::std::stringstream> ss_; - - // We declare (but don't implement) this to prevent the compiler - // from implementing the assignment operator. - void operator=(const Message&); -}; - -// Streams a Message to an ostream. -inline std::ostream& operator <<(std::ostream& os, const Message& sb) { - return os << sb.GetString(); -} - -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ -// This file was GENERATED by command: -// pump.py gtest-param-test.h.pump -// DO NOT EDIT BY HAND!!! - -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Authors: vladl@google.com (Vlad Losev) -// -// Macros and functions for implementing parameterized tests -// in Google C++ Testing Framework (Google Test) -// -// This file is generated by a SCRIPT. DO NOT EDIT BY HAND! -// -#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ -#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ - - -// Value-parameterized tests allow you to test your code with different -// parameters without writing multiple copies of the same test. -// -// Here is how you use value-parameterized tests: - -#if 0 - -// To write value-parameterized tests, first you should define a fixture -// class. It is usually derived from testing::TestWithParam (see below for -// another inheritance scheme that's sometimes useful in more complicated -// class hierarchies), where the type of your parameter values. -// TestWithParam is itself derived from testing::Test. T can be any -// copyable type. If it's a raw pointer, you are responsible for managing the -// lifespan of the pointed values. - -class FooTest : public ::testing::TestWithParam { - // You can implement all the usual class fixture members here. -}; - -// Then, use the TEST_P macro to define as many parameterized tests -// for this fixture as you want. The _P suffix is for "parameterized" -// or "pattern", whichever you prefer to think. - -TEST_P(FooTest, DoesBlah) { - // Inside a test, access the test parameter with the GetParam() method - // of the TestWithParam class: - EXPECT_TRUE(foo.Blah(GetParam())); - ... -} - -TEST_P(FooTest, HasBlahBlah) { - ... -} - -// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test -// case with any set of parameters you want. Google Test defines a number -// of functions for generating test parameters. They return what we call -// (surprise!) parameter generators. Here is a summary of them, which -// are all in the testing namespace: -// -// -// Range(begin, end [, step]) - Yields values {begin, begin+step, -// begin+step+step, ...}. The values do not -// include end. step defaults to 1. -// Values(v1, v2, ..., vN) - Yields values {v1, v2, ..., vN}. -// ValuesIn(container) - Yields values from a C-style array, an STL -// ValuesIn(begin,end) container, or an iterator range [begin, end). -// Bool() - Yields sequence {false, true}. -// Combine(g1, g2, ..., gN) - Yields all combinations (the Cartesian product -// for the math savvy) of the values generated -// by the N generators. -// -// For more details, see comments at the definitions of these functions below -// in this file. -// -// The following statement will instantiate tests from the FooTest test case -// each with parameter values "meeny", "miny", and "moe". - -INSTANTIATE_TEST_CASE_P(InstantiationName, - FooTest, - Values("meeny", "miny", "moe")); - -// To distinguish different instances of the pattern, (yes, you -// can instantiate it more then once) the first argument to the -// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the -// actual test case name. Remember to pick unique prefixes for different -// instantiations. The tests from the instantiation above will have -// these names: -// -// * InstantiationName/FooTest.DoesBlah/0 for "meeny" -// * InstantiationName/FooTest.DoesBlah/1 for "miny" -// * InstantiationName/FooTest.DoesBlah/2 for "moe" -// * InstantiationName/FooTest.HasBlahBlah/0 for "meeny" -// * InstantiationName/FooTest.HasBlahBlah/1 for "miny" -// * InstantiationName/FooTest.HasBlahBlah/2 for "moe" -// -// You can use these names in --gtest_filter. -// -// This statement will instantiate all tests from FooTest again, each -// with parameter values "cat" and "dog": - -const char* pets[] = {"cat", "dog"}; -INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets)); - -// The tests from the instantiation above will have these names: -// -// * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat" -// * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog" -// * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat" -// * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog" -// -// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests -// in the given test case, whether their definitions come before or -// AFTER the INSTANTIATE_TEST_CASE_P statement. -// -// Please also note that generator expressions (including parameters to the -// generators) are evaluated in InitGoogleTest(), after main() has started. -// This allows the user on one hand, to adjust generator parameters in order -// to dynamically determine a set of tests to run and on the other hand, -// give the user a chance to inspect the generated tests with Google Test -// reflection API before RUN_ALL_TESTS() is executed. -// -// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc -// for more examples. -// -// In the future, we plan to publish the API for defining new parameter -// generators. But for now this interface remains part of the internal -// implementation and is subject to change. -// -// -// A parameterized test fixture must be derived from testing::Test and from -// testing::WithParamInterface, where T is the type of the parameter -// values. Inheriting from TestWithParam satisfies that requirement because -// TestWithParam inherits from both Test and WithParamInterface. In more -// complicated hierarchies, however, it is occasionally useful to inherit -// separately from Test and WithParamInterface. For example: - -class BaseTest : public ::testing::Test { - // You can inherit all the usual members for a non-parameterized test - // fixture here. -}; - -class DerivedTest : public BaseTest, public ::testing::WithParamInterface { - // The usual test fixture members go here too. -}; - -TEST_F(BaseTest, HasFoo) { - // This is an ordinary non-parameterized test. -} - -TEST_P(DerivedTest, DoesBlah) { - // GetParam works just the same here as if you inherit from TestWithParam. - EXPECT_TRUE(foo.Blah(GetParam())); -} - -#endif // 0 - - -#if !GTEST_OS_SYMBIAN -# include -#endif - -// scripts/fuse_gtest.py depends on gtest's own header being #included -// *unconditionally*. Therefore these #includes cannot be moved -// inside #if GTEST_HAS_PARAM_TEST. -// Copyright 2008 Google Inc. -// All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: vladl@google.com (Vlad Losev) - -// Type and function utilities for implementing parameterized tests. - -#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ -#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ - -#include -#include -#include - -// scripts/fuse_gtest.py depends on gtest's own header being #included -// *unconditionally*. Therefore these #includes cannot be moved -// inside #if GTEST_HAS_PARAM_TEST. -// Copyright 2003 Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Authors: Dan Egnor (egnor@google.com) -// -// A "smart" pointer type with reference tracking. Every pointer to a -// particular object is kept on a circular linked list. When the last pointer -// to an object is destroyed or reassigned, the object is deleted. -// -// Used properly, this deletes the object when the last reference goes away. -// There are several caveats: -// - Like all reference counting schemes, cycles lead to leaks. -// - Each smart pointer is actually two pointers (8 bytes instead of 4). -// - Every time a pointer is assigned, the entire list of pointers to that -// object is traversed. This class is therefore NOT SUITABLE when there -// will often be more than two or three pointers to a particular object. -// - References are only tracked as long as linked_ptr<> objects are copied. -// If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS -// will happen (double deletion). -// -// A good use of this class is storing object references in STL containers. -// You can safely put linked_ptr<> in a vector<>. -// Other uses may not be as good. -// -// Note: If you use an incomplete type with linked_ptr<>, the class -// *containing* linked_ptr<> must have a constructor and destructor (even -// if they do nothing!). -// -// Bill Gibbons suggested we use something like this. -// -// Thread Safety: -// Unlike other linked_ptr implementations, in this implementation -// a linked_ptr object is thread-safe in the sense that: -// - it's safe to copy linked_ptr objects concurrently, -// - it's safe to copy *from* a linked_ptr and read its underlying -// raw pointer (e.g. via get()) concurrently, and -// - it's safe to write to two linked_ptrs that point to the same -// shared object concurrently. -// TODO(wan@google.com): rename this to safe_linked_ptr to avoid -// confusion with normal linked_ptr. - -#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ -#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ - -#include -#include - - -namespace testing { -namespace internal { - -// Protects copying of all linked_ptr objects. -GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex); - -// This is used internally by all instances of linked_ptr<>. It needs to be -// a non-template class because different types of linked_ptr<> can refer to -// the same object (linked_ptr(obj) vs linked_ptr(obj)). -// So, it needs to be possible for different types of linked_ptr to participate -// in the same circular linked list, so we need a single class type here. -// -// DO NOT USE THIS CLASS DIRECTLY YOURSELF. Use linked_ptr. -class linked_ptr_internal { - public: - // Create a new circle that includes only this instance. - void join_new() { - next_ = this; - } - - // Many linked_ptr operations may change p.link_ for some linked_ptr - // variable p in the same circle as this object. Therefore we need - // to prevent two such operations from occurring concurrently. - // - // Note that different types of linked_ptr objects can coexist in a - // circle (e.g. linked_ptr, linked_ptr, and - // linked_ptr). Therefore we must use a single mutex to - // protect all linked_ptr objects. This can create serious - // contention in production code, but is acceptable in a testing - // framework. - - // Join an existing circle. - // L < g_linked_ptr_mutex - void join(linked_ptr_internal const* ptr) { - MutexLock lock(&g_linked_ptr_mutex); - - linked_ptr_internal const* p = ptr; - while (p->next_ != ptr) p = p->next_; - p->next_ = this; - next_ = ptr; - } - - // Leave whatever circle we're part of. Returns true if we were the - // last member of the circle. Once this is done, you can join() another. - // L < g_linked_ptr_mutex - bool depart() { - MutexLock lock(&g_linked_ptr_mutex); - - if (next_ == this) return true; - linked_ptr_internal const* p = next_; - while (p->next_ != this) p = p->next_; - p->next_ = next_; - return false; - } - - private: - mutable linked_ptr_internal const* next_; -}; - -template -class linked_ptr { - public: - typedef T element_type; - - // Take over ownership of a raw pointer. This should happen as soon as - // possible after the object is created. - explicit linked_ptr(T* ptr = NULL) { capture(ptr); } - ~linked_ptr() { depart(); } - - // Copy an existing linked_ptr<>, adding ourselves to the list of references. - template linked_ptr(linked_ptr const& ptr) { copy(&ptr); } - linked_ptr(linked_ptr const& ptr) { // NOLINT - assert(&ptr != this); - copy(&ptr); - } - - // Assignment releases the old value and acquires the new. - template linked_ptr& operator=(linked_ptr const& ptr) { - depart(); - copy(&ptr); - return *this; - } - - linked_ptr& operator=(linked_ptr const& ptr) { - if (&ptr != this) { - depart(); - copy(&ptr); - } - return *this; - } - - // Smart pointer members. - void reset(T* ptr = NULL) { - depart(); - capture(ptr); - } - T* get() const { return value_; } - T* operator->() const { return value_; } - T& operator*() const { return *value_; } - - bool operator==(T* p) const { return value_ == p; } - bool operator!=(T* p) const { return value_ != p; } - template - bool operator==(linked_ptr const& ptr) const { - return value_ == ptr.get(); - } - template - bool operator!=(linked_ptr const& ptr) const { - return value_ != ptr.get(); - } - - private: - template - friend class linked_ptr; - - T* value_; - linked_ptr_internal link_; - - void depart() { - if (link_.depart()) delete value_; - } - - void capture(T* ptr) { - value_ = ptr; - link_.join_new(); - } - - template void copy(linked_ptr const* ptr) { - value_ = ptr->get(); - if (value_) - link_.join(&ptr->link_); - else - link_.join_new(); - } -}; - -template inline -bool operator==(T* ptr, const linked_ptr& x) { - return ptr == x.get(); -} - -template inline -bool operator!=(T* ptr, const linked_ptr& x) { - return ptr != x.get(); -} - -// A function to convert T* into linked_ptr -// Doing e.g. make_linked_ptr(new FooBarBaz(arg)) is a shorter notation -// for linked_ptr >(new FooBarBaz(arg)) -template -linked_ptr make_linked_ptr(T* ptr) { - return linked_ptr(ptr); -} - -} // namespace internal -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ -// Copyright 2007, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) - -// Google Test - The Google C++ Testing Framework -// -// This file implements a universal value printer that can print a -// value of any type T: -// -// void ::testing::internal::UniversalPrinter::Print(value, ostream_ptr); -// -// A user can teach this function how to print a class type T by -// defining either operator<<() or PrintTo() in the namespace that -// defines T. More specifically, the FIRST defined function in the -// following list will be used (assuming T is defined in namespace -// foo): -// -// 1. foo::PrintTo(const T&, ostream*) -// 2. operator<<(ostream&, const T&) defined in either foo or the -// global namespace. -// -// If none of the above is defined, it will print the debug string of -// the value if it is a protocol buffer, or print the raw bytes in the -// value otherwise. -// -// To aid debugging: when T is a reference type, the address of the -// value is also printed; when T is a (const) char pointer, both the -// pointer value and the NUL-terminated string it points to are -// printed. -// -// We also provide some convenient wrappers: -// -// // Prints a value to a string. For a (const or not) char -// // pointer, the NUL-terminated string (but not the pointer) is -// // printed. -// std::string ::testing::PrintToString(const T& value); -// -// // Prints a value tersely: for a reference type, the referenced -// // value (but not the address) is printed; for a (const or not) char -// // pointer, the NUL-terminated string (but not the pointer) is -// // printed. -// void ::testing::internal::UniversalTersePrint(const T& value, ostream*); -// -// // Prints value using the type inferred by the compiler. The difference -// // from UniversalTersePrint() is that this function prints both the -// // pointer and the NUL-terminated string for a (const or not) char pointer. -// void ::testing::internal::UniversalPrint(const T& value, ostream*); -// -// // Prints the fields of a tuple tersely to a string vector, one -// // element for each field. Tuple support must be enabled in -// // gtest-port.h. -// std::vector UniversalTersePrintTupleFieldsToStrings( -// const Tuple& value); -// -// Known limitation: -// -// The print primitives print the elements of an STL-style container -// using the compiler-inferred type of *iter where iter is a -// const_iterator of the container. When const_iterator is an input -// iterator but not a forward iterator, this inferred type may not -// match value_type, and the print output may be incorrect. In -// practice, this is rarely a problem as for most containers -// const_iterator is a forward iterator. We'll fix this if there's an -// actual need for it. Note that this fix cannot rely on value_type -// being defined as many user-defined container types don't have -// value_type. - -#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ -#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ - -#include // NOLINT -#include -#include -#include -#include - -namespace testing { - -// Definitions in the 'internal' and 'internal2' name spaces are -// subject to change without notice. DO NOT USE THEM IN USER CODE! -namespace internal2 { - -// Prints the given number of bytes in the given object to the given -// ostream. -GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes, - size_t count, - ::std::ostream* os); - -// For selecting which printer to use when a given type has neither << -// nor PrintTo(). -enum TypeKind { - kProtobuf, // a protobuf type - kConvertibleToInteger, // a type implicitly convertible to BiggestInt - // (e.g. a named or unnamed enum type) - kOtherType // anything else -}; - -// TypeWithoutFormatter::PrintValue(value, os) is called -// by the universal printer to print a value of type T when neither -// operator<< nor PrintTo() is defined for T, where kTypeKind is the -// "kind" of T as defined by enum TypeKind. -template -class TypeWithoutFormatter { - public: - // This default version is called when kTypeKind is kOtherType. - static void PrintValue(const T& value, ::std::ostream* os) { - PrintBytesInObjectTo(reinterpret_cast(&value), - sizeof(value), os); - } -}; - -// We print a protobuf using its ShortDebugString() when the string -// doesn't exceed this many characters; otherwise we print it using -// DebugString() for better readability. -const size_t kProtobufOneLinerMaxLength = 50; - -template -class TypeWithoutFormatter { - public: - static void PrintValue(const T& value, ::std::ostream* os) { - const ::testing::internal::string short_str = value.ShortDebugString(); - const ::testing::internal::string pretty_str = - short_str.length() <= kProtobufOneLinerMaxLength ? - short_str : ("\n" + value.DebugString()); - *os << ("<" + pretty_str + ">"); - } -}; - -template -class TypeWithoutFormatter { - public: - // Since T has no << operator or PrintTo() but can be implicitly - // converted to BiggestInt, we print it as a BiggestInt. - // - // Most likely T is an enum type (either named or unnamed), in which - // case printing it as an integer is the desired behavior. In case - // T is not an enum, printing it as an integer is the best we can do - // given that it has no user-defined printer. - static void PrintValue(const T& value, ::std::ostream* os) { - const internal::BiggestInt kBigInt = value; - *os << kBigInt; - } -}; - -// Prints the given value to the given ostream. If the value is a -// protocol message, its debug string is printed; if it's an enum or -// of a type implicitly convertible to BiggestInt, it's printed as an -// integer; otherwise the bytes in the value are printed. This is -// what UniversalPrinter::Print() does when it knows nothing about -// type T and T has neither << operator nor PrintTo(). -// -// A user can override this behavior for a class type Foo by defining -// a << operator in the namespace where Foo is defined. -// -// We put this operator in namespace 'internal2' instead of 'internal' -// to simplify the implementation, as much code in 'internal' needs to -// use << in STL, which would conflict with our own << were it defined -// in 'internal'. -// -// Note that this operator<< takes a generic std::basic_ostream type instead of the more restricted std::ostream. If -// we define it to take an std::ostream instead, we'll get an -// "ambiguous overloads" compiler error when trying to print a type -// Foo that supports streaming to std::basic_ostream, as the compiler cannot tell whether -// operator<<(std::ostream&, const T&) or -// operator<<(std::basic_stream, const Foo&) is more -// specific. -template -::std::basic_ostream& operator<<( - ::std::basic_ostream& os, const T& x) { - TypeWithoutFormatter::value ? kProtobuf : - internal::ImplicitlyConvertible::value ? - kConvertibleToInteger : kOtherType)>::PrintValue(x, &os); - return os; -} - -} // namespace internal2 -} // namespace testing - -// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up -// magic needed for implementing UniversalPrinter won't work. -namespace testing_internal { - -// Used to print a value that is not an STL-style container when the -// user doesn't define PrintTo() for it. -template -void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) { - // With the following statement, during unqualified name lookup, - // testing::internal2::operator<< appears as if it was declared in - // the nearest enclosing namespace that contains both - // ::testing_internal and ::testing::internal2, i.e. the global - // namespace. For more details, refer to the C++ Standard section - // 7.3.4-1 [namespace.udir]. This allows us to fall back onto - // testing::internal2::operator<< in case T doesn't come with a << - // operator. - // - // We cannot write 'using ::testing::internal2::operator<<;', which - // gcc 3.3 fails to compile due to a compiler bug. - using namespace ::testing::internal2; // NOLINT - - // Assuming T is defined in namespace foo, in the next statement, - // the compiler will consider all of: - // - // 1. foo::operator<< (thanks to Koenig look-up), - // 2. ::operator<< (as the current namespace is enclosed in ::), - // 3. testing::internal2::operator<< (thanks to the using statement above). - // - // The operator<< whose type matches T best will be picked. - // - // We deliberately allow #2 to be a candidate, as sometimes it's - // impossible to define #1 (e.g. when foo is ::std, defining - // anything in it is undefined behavior unless you are a compiler - // vendor.). - *os << value; -} - -} // namespace testing_internal - -namespace testing { -namespace internal { - -// UniversalPrinter::Print(value, ostream_ptr) prints the given -// value to the given ostream. The caller must ensure that -// 'ostream_ptr' is not NULL, or the behavior is undefined. -// -// We define UniversalPrinter as a class template (as opposed to a -// function template), as we need to partially specialize it for -// reference types, which cannot be done with function templates. -template -class UniversalPrinter; - -template -void UniversalPrint(const T& value, ::std::ostream* os); - -// Used to print an STL-style container when the user doesn't define -// a PrintTo() for it. -template -void DefaultPrintTo(IsContainer /* dummy */, - false_type /* is not a pointer */, - const C& container, ::std::ostream* os) { - const size_t kMaxCount = 32; // The maximum number of elements to print. - *os << '{'; - size_t count = 0; - for (typename C::const_iterator it = container.begin(); - it != container.end(); ++it, ++count) { - if (count > 0) { - *os << ','; - if (count == kMaxCount) { // Enough has been printed. - *os << " ..."; - break; - } - } - *os << ' '; - // We cannot call PrintTo(*it, os) here as PrintTo() doesn't - // handle *it being a native array. - internal::UniversalPrint(*it, os); - } - - if (count > 0) { - *os << ' '; - } - *os << '}'; -} - -// Used to print a pointer that is neither a char pointer nor a member -// pointer, when the user doesn't define PrintTo() for it. (A member -// variable pointer or member function pointer doesn't really point to -// a location in the address space. Their representation is -// implementation-defined. Therefore they will be printed as raw -// bytes.) -template -void DefaultPrintTo(IsNotContainer /* dummy */, - true_type /* is a pointer */, - T* p, ::std::ostream* os) { - if (p == NULL) { - *os << "NULL"; - } else { - // C++ doesn't allow casting from a function pointer to any object - // pointer. - // - // IsTrue() silences warnings: "Condition is always true", - // "unreachable code". - if (IsTrue(ImplicitlyConvertible::value)) { - // T is not a function type. We just call << to print p, - // relying on ADL to pick up user-defined << for their pointer - // types, if any. - *os << p; - } else { - // T is a function type, so '*os << p' doesn't do what we want - // (it just prints p as bool). We want to print p as a const - // void*. However, we cannot cast it to const void* directly, - // even using reinterpret_cast, as earlier versions of gcc - // (e.g. 3.4.5) cannot compile the cast when p is a function - // pointer. Casting to UInt64 first solves the problem. - *os << reinterpret_cast( - reinterpret_cast(p)); - } - } -} - -// Used to print a non-container, non-pointer value when the user -// doesn't define PrintTo() for it. -template -void DefaultPrintTo(IsNotContainer /* dummy */, - false_type /* is not a pointer */, - const T& value, ::std::ostream* os) { - ::testing_internal::DefaultPrintNonContainerTo(value, os); -} - -// Prints the given value using the << operator if it has one; -// otherwise prints the bytes in it. This is what -// UniversalPrinter::Print() does when PrintTo() is not specialized -// or overloaded for type T. -// -// A user can override this behavior for a class type Foo by defining -// an overload of PrintTo() in the namespace where Foo is defined. We -// give the user this option as sometimes defining a << operator for -// Foo is not desirable (e.g. the coding style may prevent doing it, -// or there is already a << operator but it doesn't do what the user -// wants). -template -void PrintTo(const T& value, ::std::ostream* os) { - // DefaultPrintTo() is overloaded. The type of its first two - // arguments determine which version will be picked. If T is an - // STL-style container, the version for container will be called; if - // T is a pointer, the pointer version will be called; otherwise the - // generic version will be called. - // - // Note that we check for container types here, prior to we check - // for protocol message types in our operator<<. The rationale is: - // - // For protocol messages, we want to give people a chance to - // override Google Mock's format by defining a PrintTo() or - // operator<<. For STL containers, other formats can be - // incompatible with Google Mock's format for the container - // elements; therefore we check for container types here to ensure - // that our format is used. - // - // The second argument of DefaultPrintTo() is needed to bypass a bug - // in Symbian's C++ compiler that prevents it from picking the right - // overload between: - // - // PrintTo(const T& x, ...); - // PrintTo(T* x, ...); - DefaultPrintTo(IsContainerTest(0), is_pointer(), value, os); -} - -// The following list of PrintTo() overloads tells -// UniversalPrinter::Print() how to print standard types (built-in -// types, strings, plain arrays, and pointers). - -// Overloads for various char types. -GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os); -GTEST_API_ void PrintTo(signed char c, ::std::ostream* os); -inline void PrintTo(char c, ::std::ostream* os) { - // When printing a plain char, we always treat it as unsigned. This - // way, the output won't be affected by whether the compiler thinks - // char is signed or not. - PrintTo(static_cast(c), os); -} - -// Overloads for other simple built-in types. -inline void PrintTo(bool x, ::std::ostream* os) { - *os << (x ? "true" : "false"); -} - -// Overload for wchar_t type. -// Prints a wchar_t as a symbol if it is printable or as its internal -// code otherwise and also as its decimal code (except for L'\0'). -// The L'\0' char is printed as "L'\\0'". The decimal code is printed -// as signed integer when wchar_t is implemented by the compiler -// as a signed type and is printed as an unsigned integer when wchar_t -// is implemented as an unsigned type. -GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os); - -// Overloads for C strings. -GTEST_API_ void PrintTo(const char* s, ::std::ostream* os); -inline void PrintTo(char* s, ::std::ostream* os) { - PrintTo(ImplicitCast_(s), os); -} - -// signed/unsigned char is often used for representing binary data, so -// we print pointers to it as void* to be safe. -inline void PrintTo(const signed char* s, ::std::ostream* os) { - PrintTo(ImplicitCast_(s), os); -} -inline void PrintTo(signed char* s, ::std::ostream* os) { - PrintTo(ImplicitCast_(s), os); -} -inline void PrintTo(const unsigned char* s, ::std::ostream* os) { - PrintTo(ImplicitCast_(s), os); -} -inline void PrintTo(unsigned char* s, ::std::ostream* os) { - PrintTo(ImplicitCast_(s), os); -} - -// MSVC can be configured to define wchar_t as a typedef of unsigned -// short. It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native -// type. When wchar_t is a typedef, defining an overload for const -// wchar_t* would cause unsigned short* be printed as a wide string, -// possibly causing invalid memory accesses. -#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) -// Overloads for wide C strings -GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os); -inline void PrintTo(wchar_t* s, ::std::ostream* os) { - PrintTo(ImplicitCast_(s), os); -} -#endif - -// Overload for C arrays. Multi-dimensional arrays are printed -// properly. - -// Prints the given number of elements in an array, without printing -// the curly braces. -template -void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) { - UniversalPrint(a[0], os); - for (size_t i = 1; i != count; i++) { - *os << ", "; - UniversalPrint(a[i], os); - } -} - -// Overloads for ::string and ::std::string. -#if GTEST_HAS_GLOBAL_STRING -GTEST_API_ void PrintStringTo(const ::string&s, ::std::ostream* os); -inline void PrintTo(const ::string& s, ::std::ostream* os) { - PrintStringTo(s, os); -} -#endif // GTEST_HAS_GLOBAL_STRING - -GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os); -inline void PrintTo(const ::std::string& s, ::std::ostream* os) { - PrintStringTo(s, os); -} - -// Overloads for ::wstring and ::std::wstring. -#if GTEST_HAS_GLOBAL_WSTRING -GTEST_API_ void PrintWideStringTo(const ::wstring&s, ::std::ostream* os); -inline void PrintTo(const ::wstring& s, ::std::ostream* os) { - PrintWideStringTo(s, os); -} -#endif // GTEST_HAS_GLOBAL_WSTRING - -#if GTEST_HAS_STD_WSTRING -GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os); -inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) { - PrintWideStringTo(s, os); -} -#endif // GTEST_HAS_STD_WSTRING - -#if GTEST_HAS_TR1_TUPLE -// Overload for ::std::tr1::tuple. Needed for printing function arguments, -// which are packed as tuples. - -// Helper function for printing a tuple. T must be instantiated with -// a tuple type. -template -void PrintTupleTo(const T& t, ::std::ostream* os); - -// Overloaded PrintTo() for tuples of various arities. We support -// tuples of up-to 10 fields. The following implementation works -// regardless of whether tr1::tuple is implemented using the -// non-standard variadic template feature or not. - -inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) { - PrintTupleTo(t, os); -} - -template -void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { - PrintTupleTo(t, os); -} - -template -void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { - PrintTupleTo(t, os); -} - -template -void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { - PrintTupleTo(t, os); -} - -template -void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { - PrintTupleTo(t, os); -} - -template -void PrintTo(const ::std::tr1::tuple& t, - ::std::ostream* os) { - PrintTupleTo(t, os); -} - -template -void PrintTo(const ::std::tr1::tuple& t, - ::std::ostream* os) { - PrintTupleTo(t, os); -} - -template -void PrintTo(const ::std::tr1::tuple& t, - ::std::ostream* os) { - PrintTupleTo(t, os); -} - -template -void PrintTo(const ::std::tr1::tuple& t, - ::std::ostream* os) { - PrintTupleTo(t, os); -} - -template -void PrintTo(const ::std::tr1::tuple& t, - ::std::ostream* os) { - PrintTupleTo(t, os); -} - -template -void PrintTo( - const ::std::tr1::tuple& t, - ::std::ostream* os) { - PrintTupleTo(t, os); -} -#endif // GTEST_HAS_TR1_TUPLE - -// Overload for std::pair. -template -void PrintTo(const ::std::pair& value, ::std::ostream* os) { - *os << '('; - // We cannot use UniversalPrint(value.first, os) here, as T1 may be - // a reference type. The same for printing value.second. - UniversalPrinter::Print(value.first, os); - *os << ", "; - UniversalPrinter::Print(value.second, os); - *os << ')'; -} - -// Implements printing a non-reference type T by letting the compiler -// pick the right overload of PrintTo() for T. -template -class UniversalPrinter { - public: - // MSVC warns about adding const to a function type, so we want to - // disable the warning. -#ifdef _MSC_VER -# pragma warning(push) // Saves the current warning state. -# pragma warning(disable:4180) // Temporarily disables warning 4180. -#endif // _MSC_VER - - // Note: we deliberately don't call this PrintTo(), as that name - // conflicts with ::testing::internal::PrintTo in the body of the - // function. - static void Print(const T& value, ::std::ostream* os) { - // By default, ::testing::internal::PrintTo() is used for printing - // the value. - // - // Thanks to Koenig look-up, if T is a class and has its own - // PrintTo() function defined in its namespace, that function will - // be visible here. Since it is more specific than the generic ones - // in ::testing::internal, it will be picked by the compiler in the - // following statement - exactly what we want. - PrintTo(value, os); - } - -#ifdef _MSC_VER -# pragma warning(pop) // Restores the warning state. -#endif // _MSC_VER -}; - -// UniversalPrintArray(begin, len, os) prints an array of 'len' -// elements, starting at address 'begin'. -template -void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) { - if (len == 0) { - *os << "{}"; - } else { - *os << "{ "; - const size_t kThreshold = 18; - const size_t kChunkSize = 8; - // If the array has more than kThreshold elements, we'll have to - // omit some details by printing only the first and the last - // kChunkSize elements. - // TODO(wan@google.com): let the user control the threshold using a flag. - if (len <= kThreshold) { - PrintRawArrayTo(begin, len, os); - } else { - PrintRawArrayTo(begin, kChunkSize, os); - *os << ", ..., "; - PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os); - } - *os << " }"; - } -} -// This overload prints a (const) char array compactly. -GTEST_API_ void UniversalPrintArray(const char* begin, - size_t len, - ::std::ostream* os); - -// Implements printing an array type T[N]. -template -class UniversalPrinter { - public: - // Prints the given array, omitting some elements when there are too - // many. - static void Print(const T (&a)[N], ::std::ostream* os) { - UniversalPrintArray(a, N, os); - } -}; - -// Implements printing a reference type T&. -template -class UniversalPrinter { - public: - // MSVC warns about adding const to a function type, so we want to - // disable the warning. -#ifdef _MSC_VER -# pragma warning(push) // Saves the current warning state. -# pragma warning(disable:4180) // Temporarily disables warning 4180. -#endif // _MSC_VER - - static void Print(const T& value, ::std::ostream* os) { - // Prints the address of the value. We use reinterpret_cast here - // as static_cast doesn't compile when T is a function type. - *os << "@" << reinterpret_cast(&value) << " "; - - // Then prints the value itself. - UniversalPrint(value, os); - } - -#ifdef _MSC_VER -# pragma warning(pop) // Restores the warning state. -#endif // _MSC_VER -}; - -// Prints a value tersely: for a reference type, the referenced value -// (but not the address) is printed; for a (const) char pointer, the -// NUL-terminated string (but not the pointer) is printed. -template -void UniversalTersePrint(const T& value, ::std::ostream* os) { - UniversalPrint(value, os); -} -inline void UniversalTersePrint(const char* str, ::std::ostream* os) { - if (str == NULL) { - *os << "NULL"; - } else { - UniversalPrint(string(str), os); - } -} -inline void UniversalTersePrint(char* str, ::std::ostream* os) { - UniversalTersePrint(static_cast(str), os); -} - -// Prints a value using the type inferred by the compiler. The -// difference between this and UniversalTersePrint() is that for a -// (const) char pointer, this prints both the pointer and the -// NUL-terminated string. -template -void UniversalPrint(const T& value, ::std::ostream* os) { - UniversalPrinter::Print(value, os); -} - -#if GTEST_HAS_TR1_TUPLE -typedef ::std::vector Strings; - -// This helper template allows PrintTo() for tuples and -// UniversalTersePrintTupleFieldsToStrings() to be defined by -// induction on the number of tuple fields. The idea is that -// TuplePrefixPrinter::PrintPrefixTo(t, os) prints the first N -// fields in tuple t, and can be defined in terms of -// TuplePrefixPrinter. - -// The inductive case. -template -struct TuplePrefixPrinter { - // Prints the first N fields of a tuple. - template - static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) { - TuplePrefixPrinter::PrintPrefixTo(t, os); - *os << ", "; - UniversalPrinter::type> - ::Print(::std::tr1::get(t), os); - } - - // Tersely prints the first N fields of a tuple to a string vector, - // one element for each field. - template - static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) { - TuplePrefixPrinter::TersePrintPrefixToStrings(t, strings); - ::std::stringstream ss; - UniversalTersePrint(::std::tr1::get(t), &ss); - strings->push_back(ss.str()); - } -}; - -// Base cases. -template <> -struct TuplePrefixPrinter<0> { - template - static void PrintPrefixTo(const Tuple&, ::std::ostream*) {} - - template - static void TersePrintPrefixToStrings(const Tuple&, Strings*) {} -}; -// We have to specialize the entire TuplePrefixPrinter<> class -// template here, even though the definition of -// TersePrintPrefixToStrings() is the same as the generic version, as -// Embarcadero (formerly CodeGear, formerly Borland) C++ doesn't -// support specializing a method template of a class template. -template <> -struct TuplePrefixPrinter<1> { - template - static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) { - UniversalPrinter::type>:: - Print(::std::tr1::get<0>(t), os); - } - - template - static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) { - ::std::stringstream ss; - UniversalTersePrint(::std::tr1::get<0>(t), &ss); - strings->push_back(ss.str()); - } -}; - -// Helper function for printing a tuple. T must be instantiated with -// a tuple type. -template -void PrintTupleTo(const T& t, ::std::ostream* os) { - *os << "("; - TuplePrefixPrinter< ::std::tr1::tuple_size::value>:: - PrintPrefixTo(t, os); - *os << ")"; -} - -// Prints the fields of a tuple tersely to a string vector, one -// element for each field. See the comment before -// UniversalTersePrint() for how we define "tersely". -template -Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) { - Strings result; - TuplePrefixPrinter< ::std::tr1::tuple_size::value>:: - TersePrintPrefixToStrings(value, &result); - return result; -} -#endif // GTEST_HAS_TR1_TUPLE - -} // namespace internal - -template -::std::string PrintToString(const T& value) { - ::std::stringstream ss; - internal::UniversalTersePrint(value, &ss); - return ss.str(); -} - -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ - -#if GTEST_HAS_PARAM_TEST - -namespace testing { -namespace internal { - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// Outputs a message explaining invalid registration of different -// fixture class for the same test case. This may happen when -// TEST_P macro is used to define two tests with the same name -// but in different namespaces. -GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name, - const char* file, int line); - -template class ParamGeneratorInterface; -template class ParamGenerator; - -// Interface for iterating over elements provided by an implementation -// of ParamGeneratorInterface. -template -class ParamIteratorInterface { - public: - virtual ~ParamIteratorInterface() {} - // A pointer to the base generator instance. - // Used only for the purposes of iterator comparison - // to make sure that two iterators belong to the same generator. - virtual const ParamGeneratorInterface* BaseGenerator() const = 0; - // Advances iterator to point to the next element - // provided by the generator. The caller is responsible - // for not calling Advance() on an iterator equal to - // BaseGenerator()->End(). - virtual void Advance() = 0; - // Clones the iterator object. Used for implementing copy semantics - // of ParamIterator. - virtual ParamIteratorInterface* Clone() const = 0; - // Dereferences the current iterator and provides (read-only) access - // to the pointed value. It is the caller's responsibility not to call - // Current() on an iterator equal to BaseGenerator()->End(). - // Used for implementing ParamGenerator::operator*(). - virtual const T* Current() const = 0; - // Determines whether the given iterator and other point to the same - // element in the sequence generated by the generator. - // Used for implementing ParamGenerator::operator==(). - virtual bool Equals(const ParamIteratorInterface& other) const = 0; -}; - -// Class iterating over elements provided by an implementation of -// ParamGeneratorInterface. It wraps ParamIteratorInterface -// and implements the const forward iterator concept. -template -class ParamIterator { - public: - typedef T value_type; - typedef const T& reference; - typedef ptrdiff_t difference_type; - - // ParamIterator assumes ownership of the impl_ pointer. - ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {} - ParamIterator& operator=(const ParamIterator& other) { - if (this != &other) - impl_.reset(other.impl_->Clone()); - return *this; - } - - const T& operator*() const { return *impl_->Current(); } - const T* operator->() const { return impl_->Current(); } - // Prefix version of operator++. - ParamIterator& operator++() { - impl_->Advance(); - return *this; - } - // Postfix version of operator++. - ParamIterator operator++(int /*unused*/) { - ParamIteratorInterface* clone = impl_->Clone(); - impl_->Advance(); - return ParamIterator(clone); - } - bool operator==(const ParamIterator& other) const { - return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_); - } - bool operator!=(const ParamIterator& other) const { - return !(*this == other); - } - - private: - friend class ParamGenerator; - explicit ParamIterator(ParamIteratorInterface* impl) : impl_(impl) {} - scoped_ptr > impl_; -}; - -// ParamGeneratorInterface is the binary interface to access generators -// defined in other translation units. -template -class ParamGeneratorInterface { - public: - typedef T ParamType; - - virtual ~ParamGeneratorInterface() {} - - // Generator interface definition - virtual ParamIteratorInterface* Begin() const = 0; - virtual ParamIteratorInterface* End() const = 0; -}; - -// Wraps ParamGeneratorInterface and provides general generator syntax -// compatible with the STL Container concept. -// This class implements copy initialization semantics and the contained -// ParamGeneratorInterface instance is shared among all copies -// of the original object. This is possible because that instance is immutable. -template -class ParamGenerator { - public: - typedef ParamIterator iterator; - - explicit ParamGenerator(ParamGeneratorInterface* impl) : impl_(impl) {} - ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {} - - ParamGenerator& operator=(const ParamGenerator& other) { - impl_ = other.impl_; - return *this; - } - - iterator begin() const { return iterator(impl_->Begin()); } - iterator end() const { return iterator(impl_->End()); } - - private: - linked_ptr > impl_; -}; - -// Generates values from a range of two comparable values. Can be used to -// generate sequences of user-defined types that implement operator+() and -// operator<(). -// This class is used in the Range() function. -template -class RangeGenerator : public ParamGeneratorInterface { - public: - RangeGenerator(T begin, T end, IncrementT step) - : begin_(begin), end_(end), - step_(step), end_index_(CalculateEndIndex(begin, end, step)) {} - virtual ~RangeGenerator() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, begin_, 0, step_); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, end_, end_index_, step_); - } - - private: - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, T value, int index, - IncrementT step) - : base_(base), value_(value), index_(index), step_(step) {} - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - virtual void Advance() { - value_ = value_ + step_; - index_++; - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - virtual const T* Current() const { return &value_; } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - const int other_index = - CheckedDowncastToActualType(&other)->index_; - return index_ == other_index; - } - - private: - Iterator(const Iterator& other) - : ParamIteratorInterface(), - base_(other.base_), value_(other.value_), index_(other.index_), - step_(other.step_) {} - - // No implementation - assignment is unsupported. - void operator=(const Iterator& other); - - const ParamGeneratorInterface* const base_; - T value_; - int index_; - const IncrementT step_; - }; // class RangeGenerator::Iterator - - static int CalculateEndIndex(const T& begin, - const T& end, - const IncrementT& step) { - int end_index = 0; - for (T i = begin; i < end; i = i + step) - end_index++; - return end_index; - } - - // No implementation - assignment is unsupported. - void operator=(const RangeGenerator& other); - - const T begin_; - const T end_; - const IncrementT step_; - // The index for the end() iterator. All the elements in the generated - // sequence are indexed (0-based) to aid iterator comparison. - const int end_index_; -}; // class RangeGenerator - - -// Generates values from a pair of STL-style iterators. Used in the -// ValuesIn() function. The elements are copied from the source range -// since the source can be located on the stack, and the generator -// is likely to persist beyond that stack frame. -template -class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface { - public: - template - ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end) - : container_(begin, end) {} - virtual ~ValuesInIteratorRangeGenerator() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, container_.begin()); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, container_.end()); - } - - private: - typedef typename ::std::vector ContainerType; - - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, - typename ContainerType::const_iterator iterator) - : base_(base), iterator_(iterator) {} - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - virtual void Advance() { - ++iterator_; - value_.reset(); - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - // We need to use cached value referenced by iterator_ because *iterator_ - // can return a temporary object (and of type other then T), so just - // having "return &*iterator_;" doesn't work. - // value_ is updated here and not in Advance() because Advance() - // can advance iterator_ beyond the end of the range, and we cannot - // detect that fact. The client code, on the other hand, is - // responsible for not calling Current() on an out-of-range iterator. - virtual const T* Current() const { - if (value_.get() == NULL) - value_.reset(new T(*iterator_)); - return value_.get(); - } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - return iterator_ == - CheckedDowncastToActualType(&other)->iterator_; - } - - private: - Iterator(const Iterator& other) - // The explicit constructor call suppresses a false warning - // emitted by gcc when supplied with the -Wextra option. - : ParamIteratorInterface(), - base_(other.base_), - iterator_(other.iterator_) {} - - const ParamGeneratorInterface* const base_; - typename ContainerType::const_iterator iterator_; - // A cached value of *iterator_. We keep it here to allow access by - // pointer in the wrapping iterator's operator->(). - // value_ needs to be mutable to be accessed in Current(). - // Use of scoped_ptr helps manage cached value's lifetime, - // which is bound by the lifespan of the iterator itself. - mutable scoped_ptr value_; - }; // class ValuesInIteratorRangeGenerator::Iterator - - // No implementation - assignment is unsupported. - void operator=(const ValuesInIteratorRangeGenerator& other); - - const ContainerType container_; -}; // class ValuesInIteratorRangeGenerator - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// Stores a parameter value and later creates tests parameterized with that -// value. -template -class ParameterizedTestFactory : public TestFactoryBase { - public: - typedef typename TestClass::ParamType ParamType; - explicit ParameterizedTestFactory(ParamType parameter) : - parameter_(parameter) {} - virtual Test* CreateTest() { - TestClass::SetParam(¶meter_); - return new TestClass(); - } - - private: - const ParamType parameter_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory); -}; - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// TestMetaFactoryBase is a base class for meta-factories that create -// test factories for passing into MakeAndRegisterTestInfo function. -template -class TestMetaFactoryBase { - public: - virtual ~TestMetaFactoryBase() {} - - virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0; -}; - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// TestMetaFactory creates test factories for passing into -// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives -// ownership of test factory pointer, same factory object cannot be passed -// into that method twice. But ParameterizedTestCaseInfo is going to call -// it for each Test/Parameter value combination. Thus it needs meta factory -// creator class. -template -class TestMetaFactory - : public TestMetaFactoryBase { - public: - typedef typename TestCase::ParamType ParamType; - - TestMetaFactory() {} - - virtual TestFactoryBase* CreateTestFactory(ParamType parameter) { - return new ParameterizedTestFactory(parameter); - } - - private: - GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory); -}; - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// ParameterizedTestCaseInfoBase is a generic interface -// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase -// accumulates test information provided by TEST_P macro invocations -// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations -// and uses that information to register all resulting test instances -// in RegisterTests method. The ParameterizeTestCaseRegistry class holds -// a collection of pointers to the ParameterizedTestCaseInfo objects -// and calls RegisterTests() on each of them when asked. -class ParameterizedTestCaseInfoBase { - public: - virtual ~ParameterizedTestCaseInfoBase() {} - - // Base part of test case name for display purposes. - virtual const string& GetTestCaseName() const = 0; - // Test case id to verify identity. - virtual TypeId GetTestCaseTypeId() const = 0; - // UnitTest class invokes this method to register tests in this - // test case right before running them in RUN_ALL_TESTS macro. - // This method should not be called more then once on any single - // instance of a ParameterizedTestCaseInfoBase derived class. - virtual void RegisterTests() = 0; - - protected: - ParameterizedTestCaseInfoBase() {} - - private: - GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase); -}; - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P -// macro invocations for a particular test case and generators -// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that -// test case. It registers tests with all values generated by all -// generators when asked. -template -class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase { - public: - // ParamType and GeneratorCreationFunc are private types but are required - // for declarations of public methods AddTestPattern() and - // AddTestCaseInstantiation(). - typedef typename TestCase::ParamType ParamType; - // A function that returns an instance of appropriate generator type. - typedef ParamGenerator(GeneratorCreationFunc)(); - - explicit ParameterizedTestCaseInfo(const char* name) - : test_case_name_(name) {} - - // Test case base name for display purposes. - virtual const string& GetTestCaseName() const { return test_case_name_; } - // Test case id to verify identity. - virtual TypeId GetTestCaseTypeId() const { return GetTypeId(); } - // TEST_P macro uses AddTestPattern() to record information - // about a single test in a LocalTestInfo structure. - // test_case_name is the base name of the test case (without invocation - // prefix). test_base_name is the name of an individual test without - // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is - // test case base name and DoBar is test base name. - void AddTestPattern(const char* test_case_name, - const char* test_base_name, - TestMetaFactoryBase* meta_factory) { - tests_.push_back(linked_ptr(new TestInfo(test_case_name, - test_base_name, - meta_factory))); - } - // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information - // about a generator. - int AddTestCaseInstantiation(const string& instantiation_name, - GeneratorCreationFunc* func, - const char* /* file */, - int /* line */) { - instantiations_.push_back(::std::make_pair(instantiation_name, func)); - return 0; // Return value used only to run this method in namespace scope. - } - // UnitTest class invokes this method to register tests in this test case - // test cases right before running tests in RUN_ALL_TESTS macro. - // This method should not be called more then once on any single - // instance of a ParameterizedTestCaseInfoBase derived class. - // UnitTest has a guard to prevent from calling this method more then once. - virtual void RegisterTests() { - for (typename TestInfoContainer::iterator test_it = tests_.begin(); - test_it != tests_.end(); ++test_it) { - linked_ptr test_info = *test_it; - for (typename InstantiationContainer::iterator gen_it = - instantiations_.begin(); gen_it != instantiations_.end(); - ++gen_it) { - const string& instantiation_name = gen_it->first; - ParamGenerator generator((*gen_it->second)()); - - Message test_case_name_stream; - if ( !instantiation_name.empty() ) - test_case_name_stream << instantiation_name << "/"; - test_case_name_stream << test_info->test_case_base_name; - - int i = 0; - for (typename ParamGenerator::iterator param_it = - generator.begin(); - param_it != generator.end(); ++param_it, ++i) { - Message test_name_stream; - test_name_stream << test_info->test_base_name << "/" << i; - MakeAndRegisterTestInfo( - test_case_name_stream.GetString().c_str(), - test_name_stream.GetString().c_str(), - NULL, // No type parameter. - PrintToString(*param_it).c_str(), - GetTestCaseTypeId(), - TestCase::SetUpTestCase, - TestCase::TearDownTestCase, - test_info->test_meta_factory->CreateTestFactory(*param_it)); - } // for param_it - } // for gen_it - } // for test_it - } // RegisterTests - - private: - // LocalTestInfo structure keeps information about a single test registered - // with TEST_P macro. - struct TestInfo { - TestInfo(const char* a_test_case_base_name, - const char* a_test_base_name, - TestMetaFactoryBase* a_test_meta_factory) : - test_case_base_name(a_test_case_base_name), - test_base_name(a_test_base_name), - test_meta_factory(a_test_meta_factory) {} - - const string test_case_base_name; - const string test_base_name; - const scoped_ptr > test_meta_factory; - }; - typedef ::std::vector > TestInfoContainer; - // Keeps pairs of - // received from INSTANTIATE_TEST_CASE_P macros. - typedef ::std::vector > - InstantiationContainer; - - const string test_case_name_; - TestInfoContainer tests_; - InstantiationContainer instantiations_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo); -}; // class ParameterizedTestCaseInfo - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase -// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P -// macros use it to locate their corresponding ParameterizedTestCaseInfo -// descriptors. -class ParameterizedTestCaseRegistry { - public: - ParameterizedTestCaseRegistry() {} - ~ParameterizedTestCaseRegistry() { - for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); - it != test_case_infos_.end(); ++it) { - delete *it; - } - } - - // Looks up or creates and returns a structure containing information about - // tests and instantiations of a particular test case. - template - ParameterizedTestCaseInfo* GetTestCasePatternHolder( - const char* test_case_name, - const char* file, - int line) { - ParameterizedTestCaseInfo* typed_test_info = NULL; - for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); - it != test_case_infos_.end(); ++it) { - if ((*it)->GetTestCaseName() == test_case_name) { - if ((*it)->GetTestCaseTypeId() != GetTypeId()) { - // Complain about incorrect usage of Google Test facilities - // and terminate the program since we cannot guaranty correct - // test case setup and tear-down in this case. - ReportInvalidTestCaseType(test_case_name, file, line); - posix::Abort(); - } else { - // At this point we are sure that the object we found is of the same - // type we are looking for, so we downcast it to that type - // without further checks. - typed_test_info = CheckedDowncastToActualType< - ParameterizedTestCaseInfo >(*it); - } - break; - } - } - if (typed_test_info == NULL) { - typed_test_info = new ParameterizedTestCaseInfo(test_case_name); - test_case_infos_.push_back(typed_test_info); - } - return typed_test_info; - } - void RegisterTests() { - for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); - it != test_case_infos_.end(); ++it) { - (*it)->RegisterTests(); - } - } - - private: - typedef ::std::vector TestCaseInfoContainer; - - TestCaseInfoContainer test_case_infos_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry); -}; - -} // namespace internal -} // namespace testing - -#endif // GTEST_HAS_PARAM_TEST - -#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ -// This file was GENERATED by command: -// pump.py gtest-param-util-generated.h.pump -// DO NOT EDIT BY HAND!!! - -// Copyright 2008 Google Inc. -// All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: vladl@google.com (Vlad Losev) - -// Type and function utilities for implementing parameterized tests. -// This file is generated by a SCRIPT. DO NOT EDIT BY HAND! -// -// Currently Google Test supports at most 50 arguments in Values, -// and at most 10 arguments in Combine. Please contact -// googletestframework@googlegroups.com if you need more. -// Please note that the number of arguments to Combine is limited -// by the maximum arity of the implementation of tr1::tuple which is -// currently set at 10. - -#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ -#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ - -// scripts/fuse_gtest.py depends on gtest's own header being #included -// *unconditionally*. Therefore these #includes cannot be moved -// inside #if GTEST_HAS_PARAM_TEST. - -#if GTEST_HAS_PARAM_TEST - -namespace testing { - -// Forward declarations of ValuesIn(), which is implemented in -// include/gtest/gtest-param-test.h. -template -internal::ParamGenerator< - typename ::testing::internal::IteratorTraits::value_type> -ValuesIn(ForwardIterator begin, ForwardIterator end); - -template -internal::ParamGenerator ValuesIn(const T (&array)[N]); - -template -internal::ParamGenerator ValuesIn( - const Container& container); - -namespace internal { - -// Used in the Values() function to provide polymorphic capabilities. -template -class ValueArray1 { - public: - explicit ValueArray1(T1 v1) : v1_(v1) {} - - template - operator ParamGenerator() const { return ValuesIn(&v1_, &v1_ + 1); } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray1& other); - - const T1 v1_; -}; - -template -class ValueArray2 { - public: - ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray2& other); - - const T1 v1_; - const T2 v2_; -}; - -template -class ValueArray3 { - public: - ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray3& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; -}; - -template -class ValueArray4 { - public: - ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3), - v4_(v4) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray4& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; -}; - -template -class ValueArray5 { - public: - ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3), - v4_(v4), v5_(v5) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray5& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; -}; - -template -class ValueArray6 { - public: - ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2), - v3_(v3), v4_(v4), v5_(v5), v6_(v6) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray6& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; -}; - -template -class ValueArray7 { - public: - ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1), - v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray7& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; -}; - -template -class ValueArray8 { - public: - ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, - T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray8& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; -}; - -template -class ValueArray9 { - public: - ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, - T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray9& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; -}; - -template -class ValueArray10 { - public: - ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray10& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; -}; - -template -class ValueArray11 { - public: - ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), - v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray11& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; -}; - -template -class ValueArray12 { - public: - ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), - v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray12& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; -}; - -template -class ValueArray13 { - public: - ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), - v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), - v12_(v12), v13_(v13) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray13& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; -}; - -template -class ValueArray14 { - public: - ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3), - v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), - v11_(v11), v12_(v12), v13_(v13), v14_(v14) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray14& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; -}; - -template -class ValueArray15 { - public: - ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2), - v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), - v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray15& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; -}; - -template -class ValueArray16 { - public: - ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1), - v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), - v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), - v16_(v16) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray16& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; -}; - -template -class ValueArray17 { - public: - ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, - T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), - v15_(v15), v16_(v16), v17_(v17) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray17& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; -}; - -template -class ValueArray18 { - public: - ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), - v15_(v15), v16_(v16), v17_(v17), v18_(v18) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray18& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; -}; - -template -class ValueArray19 { - public: - ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), - v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), - v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray19& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; -}; - -template -class ValueArray20 { - public: - ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), - v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), - v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), - v19_(v19), v20_(v20) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray20& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; -}; - -template -class ValueArray21 { - public: - ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), - v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), - v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), - v18_(v18), v19_(v19), v20_(v20), v21_(v21) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray21& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; -}; - -template -class ValueArray22 { - public: - ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3), - v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), - v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), - v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray22& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; -}; - -template -class ValueArray23 { - public: - ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2), - v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), - v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), - v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), - v23_(v23) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, - v23_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray23& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; -}; - -template -class ValueArray24 { - public: - ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1), - v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), - v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), - v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), - v22_(v22), v23_(v23), v24_(v24) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray24& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; -}; - -template -class ValueArray25 { - public: - ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, - T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), - v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), - v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray25& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; -}; - -template -class ValueArray26 { - public: - ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), - v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), - v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray26& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; -}; - -template -class ValueArray27 { - public: - ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), - v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), - v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), - v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), - v26_(v26), v27_(v27) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray27& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; -}; - -template -class ValueArray28 { - public: - ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), - v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), - v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), - v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), - v25_(v25), v26_(v26), v27_(v27), v28_(v28) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray28& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; -}; - -template -class ValueArray29 { - public: - ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), - v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), - v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), - v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), - v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray29& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; -}; - -template -class ValueArray30 { - public: - ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3), - v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), - v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), - v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), - v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), - v29_(v29), v30_(v30) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray30& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; -}; - -template -class ValueArray31 { - public: - ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2), - v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), - v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), - v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), - v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), - v29_(v29), v30_(v30), v31_(v31) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray31& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; -}; - -template -class ValueArray32 { - public: - ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1), - v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), - v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), - v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), - v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), - v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray32& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; -}; - -template -class ValueArray33 { - public: - ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, - T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), - v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), - v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), - v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), - v33_(v33) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray33& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; -}; - -template -class ValueArray34 { - public: - ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), - v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), - v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), - v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), - v33_(v33), v34_(v34) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray34& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; -}; - -template -class ValueArray35 { - public: - ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), - v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), - v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), - v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), - v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), - v32_(v32), v33_(v33), v34_(v34), v35_(v35) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, - v35_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray35& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; -}; - -template -class ValueArray36 { - public: - ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), - v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), - v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), - v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), - v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), - v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray36& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; -}; - -template -class ValueArray37 { - public: - ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), - v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), - v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), - v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), - v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), - v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), - v36_(v36), v37_(v37) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray37& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; -}; - -template -class ValueArray38 { - public: - ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3), - v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), - v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), - v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), - v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), - v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), - v35_(v35), v36_(v36), v37_(v37), v38_(v38) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray38& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; -}; - -template -class ValueArray39 { - public: - ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2), - v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), - v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), - v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), - v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), - v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), - v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray39& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; -}; - -template -class ValueArray40 { - public: - ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1), - v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), - v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), - v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), - v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), - v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), - v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), - v40_(v40) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray40& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; -}; - -template -class ValueArray41 { - public: - ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, - T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), - v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), - v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), - v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), - v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), - v39_(v39), v40_(v40), v41_(v41) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_, v41_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray41& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; - const T41 v41_; -}; - -template -class ValueArray42 { - public: - ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), - v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), - v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), - v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), - v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), - v39_(v39), v40_(v40), v41_(v41), v42_(v42) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_, v41_, v42_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray42& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; - const T41 v41_; - const T42 v42_; -}; - -template -class ValueArray43 { - public: - ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), - v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), - v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), - v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), - v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), - v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), - v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray43& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; - const T41 v41_; - const T42 v42_; - const T43 v43_; -}; - -template -class ValueArray44 { - public: - ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), - v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), - v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), - v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), - v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), - v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), - v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), - v43_(v43), v44_(v44) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray44& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; - const T41 v41_; - const T42 v42_; - const T43 v43_; - const T44 v44_; -}; - -template -class ValueArray45 { - public: - ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), - v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), - v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), - v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), - v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), - v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), - v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), - v42_(v42), v43_(v43), v44_(v44), v45_(v45) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray45& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; - const T41 v41_; - const T42 v42_; - const T43 v43_; - const T44 v44_; - const T45 v45_; -}; - -template -class ValueArray46 { - public: - ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3), - v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), - v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), - v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), - v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), - v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), - v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), - v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray46& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; - const T41 v41_; - const T42 v42_; - const T43 v43_; - const T44 v44_; - const T45 v45_; - const T46 v46_; -}; - -template -class ValueArray47 { - public: - ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2), - v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), - v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), - v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), - v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), - v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), - v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), - v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46), - v47_(v47) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_, - v47_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray47& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; - const T41 v41_; - const T42 v42_; - const T43 v43_; - const T44 v44_; - const T45 v45_; - const T46 v46_; - const T47 v47_; -}; - -template -class ValueArray48 { - public: - ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1), - v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), - v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), - v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), - v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), - v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), - v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), - v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), - v46_(v46), v47_(v47), v48_(v48) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_, v47_, - v48_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray48& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; - const T41 v41_; - const T42 v42_; - const T43 v43_; - const T44 v44_; - const T45 v45_; - const T46 v46_; - const T47 v47_; - const T48 v48_; -}; - -template -class ValueArray49 { - public: - ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, - T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), - v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), - v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), - v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), - v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), - v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), - v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_, v47_, - v48_, v49_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray49& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; - const T41 v41_; - const T42 v42_; - const T43 v43_; - const T44 v44_; - const T45 v45_; - const T46 v46_; - const T47 v47_; - const T48 v48_; - const T49 v49_; -}; - -template -class ValueArray50 { - public: - ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49, - T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), - v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), - v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), - v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), - v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), - v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), - v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), - v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {} - - template - operator ParamGenerator() const { - const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_, - v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_, - v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_, - v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_, v47_, - v48_, v49_, v50_}; - return ValuesIn(array); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const ValueArray50& other); - - const T1 v1_; - const T2 v2_; - const T3 v3_; - const T4 v4_; - const T5 v5_; - const T6 v6_; - const T7 v7_; - const T8 v8_; - const T9 v9_; - const T10 v10_; - const T11 v11_; - const T12 v12_; - const T13 v13_; - const T14 v14_; - const T15 v15_; - const T16 v16_; - const T17 v17_; - const T18 v18_; - const T19 v19_; - const T20 v20_; - const T21 v21_; - const T22 v22_; - const T23 v23_; - const T24 v24_; - const T25 v25_; - const T26 v26_; - const T27 v27_; - const T28 v28_; - const T29 v29_; - const T30 v30_; - const T31 v31_; - const T32 v32_; - const T33 v33_; - const T34 v34_; - const T35 v35_; - const T36 v36_; - const T37 v37_; - const T38 v38_; - const T39 v39_; - const T40 v40_; - const T41 v41_; - const T42 v42_; - const T43 v43_; - const T44 v44_; - const T45 v45_; - const T46 v46_; - const T47 v47_; - const T48 v48_; - const T49 v49_; - const T50 v50_; -}; - -# if GTEST_HAS_COMBINE -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// Generates values from the Cartesian product of values produced -// by the argument generators. -// -template -class CartesianProductGenerator2 - : public ParamGeneratorInterface< ::std::tr1::tuple > { - public: - typedef ::std::tr1::tuple ParamType; - - CartesianProductGenerator2(const ParamGenerator& g1, - const ParamGenerator& g2) - : g1_(g1), g2_(g2) {} - virtual ~CartesianProductGenerator2() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin()); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, g1_, g1_.end(), g2_, g2_.end()); - } - - private: - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, - const ParamGenerator& g1, - const typename ParamGenerator::iterator& current1, - const ParamGenerator& g2, - const typename ParamGenerator::iterator& current2) - : base_(base), - begin1_(g1.begin()), end1_(g1.end()), current1_(current1), - begin2_(g2.begin()), end2_(g2.end()), current2_(current2) { - ComputeCurrentValue(); - } - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - // Advance should not be called on beyond-of-range iterators - // so no component iterators must be beyond end of range, either. - virtual void Advance() { - assert(!AtEnd()); - ++current2_; - if (current2_ == end2_) { - current2_ = begin2_; - ++current1_; - } - ComputeCurrentValue(); - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - virtual const ParamType* Current() const { return ¤t_value_; } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - const Iterator* typed_other = - CheckedDowncastToActualType(&other); - // We must report iterators equal if they both point beyond their - // respective ranges. That can happen in a variety of fashions, - // so we have to consult AtEnd(). - return (AtEnd() && typed_other->AtEnd()) || - ( - current1_ == typed_other->current1_ && - current2_ == typed_other->current2_); - } - - private: - Iterator(const Iterator& other) - : base_(other.base_), - begin1_(other.begin1_), - end1_(other.end1_), - current1_(other.current1_), - begin2_(other.begin2_), - end2_(other.end2_), - current2_(other.current2_) { - ComputeCurrentValue(); - } - - void ComputeCurrentValue() { - if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_); - } - bool AtEnd() const { - // We must report iterator past the end of the range when either of the - // component iterators has reached the end of its range. - return - current1_ == end1_ || - current2_ == end2_; - } - - // No implementation - assignment is unsupported. - void operator=(const Iterator& other); - - const ParamGeneratorInterface* const base_; - // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. - // current[i]_ is the actual traversing iterator. - const typename ParamGenerator::iterator begin1_; - const typename ParamGenerator::iterator end1_; - typename ParamGenerator::iterator current1_; - const typename ParamGenerator::iterator begin2_; - const typename ParamGenerator::iterator end2_; - typename ParamGenerator::iterator current2_; - ParamType current_value_; - }; // class CartesianProductGenerator2::Iterator - - // No implementation - assignment is unsupported. - void operator=(const CartesianProductGenerator2& other); - - const ParamGenerator g1_; - const ParamGenerator g2_; -}; // class CartesianProductGenerator2 - - -template -class CartesianProductGenerator3 - : public ParamGeneratorInterface< ::std::tr1::tuple > { - public: - typedef ::std::tr1::tuple ParamType; - - CartesianProductGenerator3(const ParamGenerator& g1, - const ParamGenerator& g2, const ParamGenerator& g3) - : g1_(g1), g2_(g2), g3_(g3) {} - virtual ~CartesianProductGenerator3() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, - g3_.begin()); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end()); - } - - private: - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, - const ParamGenerator& g1, - const typename ParamGenerator::iterator& current1, - const ParamGenerator& g2, - const typename ParamGenerator::iterator& current2, - const ParamGenerator& g3, - const typename ParamGenerator::iterator& current3) - : base_(base), - begin1_(g1.begin()), end1_(g1.end()), current1_(current1), - begin2_(g2.begin()), end2_(g2.end()), current2_(current2), - begin3_(g3.begin()), end3_(g3.end()), current3_(current3) { - ComputeCurrentValue(); - } - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - // Advance should not be called on beyond-of-range iterators - // so no component iterators must be beyond end of range, either. - virtual void Advance() { - assert(!AtEnd()); - ++current3_; - if (current3_ == end3_) { - current3_ = begin3_; - ++current2_; - } - if (current2_ == end2_) { - current2_ = begin2_; - ++current1_; - } - ComputeCurrentValue(); - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - virtual const ParamType* Current() const { return ¤t_value_; } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - const Iterator* typed_other = - CheckedDowncastToActualType(&other); - // We must report iterators equal if they both point beyond their - // respective ranges. That can happen in a variety of fashions, - // so we have to consult AtEnd(). - return (AtEnd() && typed_other->AtEnd()) || - ( - current1_ == typed_other->current1_ && - current2_ == typed_other->current2_ && - current3_ == typed_other->current3_); - } - - private: - Iterator(const Iterator& other) - : base_(other.base_), - begin1_(other.begin1_), - end1_(other.end1_), - current1_(other.current1_), - begin2_(other.begin2_), - end2_(other.end2_), - current2_(other.current2_), - begin3_(other.begin3_), - end3_(other.end3_), - current3_(other.current3_) { - ComputeCurrentValue(); - } - - void ComputeCurrentValue() { - if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_); - } - bool AtEnd() const { - // We must report iterator past the end of the range when either of the - // component iterators has reached the end of its range. - return - current1_ == end1_ || - current2_ == end2_ || - current3_ == end3_; - } - - // No implementation - assignment is unsupported. - void operator=(const Iterator& other); - - const ParamGeneratorInterface* const base_; - // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. - // current[i]_ is the actual traversing iterator. - const typename ParamGenerator::iterator begin1_; - const typename ParamGenerator::iterator end1_; - typename ParamGenerator::iterator current1_; - const typename ParamGenerator::iterator begin2_; - const typename ParamGenerator::iterator end2_; - typename ParamGenerator::iterator current2_; - const typename ParamGenerator::iterator begin3_; - const typename ParamGenerator::iterator end3_; - typename ParamGenerator::iterator current3_; - ParamType current_value_; - }; // class CartesianProductGenerator3::Iterator - - // No implementation - assignment is unsupported. - void operator=(const CartesianProductGenerator3& other); - - const ParamGenerator g1_; - const ParamGenerator g2_; - const ParamGenerator g3_; -}; // class CartesianProductGenerator3 - - -template -class CartesianProductGenerator4 - : public ParamGeneratorInterface< ::std::tr1::tuple > { - public: - typedef ::std::tr1::tuple ParamType; - - CartesianProductGenerator4(const ParamGenerator& g1, - const ParamGenerator& g2, const ParamGenerator& g3, - const ParamGenerator& g4) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {} - virtual ~CartesianProductGenerator4() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, - g3_.begin(), g4_, g4_.begin()); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), - g4_, g4_.end()); - } - - private: - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, - const ParamGenerator& g1, - const typename ParamGenerator::iterator& current1, - const ParamGenerator& g2, - const typename ParamGenerator::iterator& current2, - const ParamGenerator& g3, - const typename ParamGenerator::iterator& current3, - const ParamGenerator& g4, - const typename ParamGenerator::iterator& current4) - : base_(base), - begin1_(g1.begin()), end1_(g1.end()), current1_(current1), - begin2_(g2.begin()), end2_(g2.end()), current2_(current2), - begin3_(g3.begin()), end3_(g3.end()), current3_(current3), - begin4_(g4.begin()), end4_(g4.end()), current4_(current4) { - ComputeCurrentValue(); - } - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - // Advance should not be called on beyond-of-range iterators - // so no component iterators must be beyond end of range, either. - virtual void Advance() { - assert(!AtEnd()); - ++current4_; - if (current4_ == end4_) { - current4_ = begin4_; - ++current3_; - } - if (current3_ == end3_) { - current3_ = begin3_; - ++current2_; - } - if (current2_ == end2_) { - current2_ = begin2_; - ++current1_; - } - ComputeCurrentValue(); - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - virtual const ParamType* Current() const { return ¤t_value_; } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - const Iterator* typed_other = - CheckedDowncastToActualType(&other); - // We must report iterators equal if they both point beyond their - // respective ranges. That can happen in a variety of fashions, - // so we have to consult AtEnd(). - return (AtEnd() && typed_other->AtEnd()) || - ( - current1_ == typed_other->current1_ && - current2_ == typed_other->current2_ && - current3_ == typed_other->current3_ && - current4_ == typed_other->current4_); - } - - private: - Iterator(const Iterator& other) - : base_(other.base_), - begin1_(other.begin1_), - end1_(other.end1_), - current1_(other.current1_), - begin2_(other.begin2_), - end2_(other.end2_), - current2_(other.current2_), - begin3_(other.begin3_), - end3_(other.end3_), - current3_(other.current3_), - begin4_(other.begin4_), - end4_(other.end4_), - current4_(other.current4_) { - ComputeCurrentValue(); - } - - void ComputeCurrentValue() { - if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_); - } - bool AtEnd() const { - // We must report iterator past the end of the range when either of the - // component iterators has reached the end of its range. - return - current1_ == end1_ || - current2_ == end2_ || - current3_ == end3_ || - current4_ == end4_; - } - - // No implementation - assignment is unsupported. - void operator=(const Iterator& other); - - const ParamGeneratorInterface* const base_; - // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. - // current[i]_ is the actual traversing iterator. - const typename ParamGenerator::iterator begin1_; - const typename ParamGenerator::iterator end1_; - typename ParamGenerator::iterator current1_; - const typename ParamGenerator::iterator begin2_; - const typename ParamGenerator::iterator end2_; - typename ParamGenerator::iterator current2_; - const typename ParamGenerator::iterator begin3_; - const typename ParamGenerator::iterator end3_; - typename ParamGenerator::iterator current3_; - const typename ParamGenerator::iterator begin4_; - const typename ParamGenerator::iterator end4_; - typename ParamGenerator::iterator current4_; - ParamType current_value_; - }; // class CartesianProductGenerator4::Iterator - - // No implementation - assignment is unsupported. - void operator=(const CartesianProductGenerator4& other); - - const ParamGenerator g1_; - const ParamGenerator g2_; - const ParamGenerator g3_; - const ParamGenerator g4_; -}; // class CartesianProductGenerator4 - - -template -class CartesianProductGenerator5 - : public ParamGeneratorInterface< ::std::tr1::tuple > { - public: - typedef ::std::tr1::tuple ParamType; - - CartesianProductGenerator5(const ParamGenerator& g1, - const ParamGenerator& g2, const ParamGenerator& g3, - const ParamGenerator& g4, const ParamGenerator& g5) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {} - virtual ~CartesianProductGenerator5() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, - g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin()); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), - g4_, g4_.end(), g5_, g5_.end()); - } - - private: - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, - const ParamGenerator& g1, - const typename ParamGenerator::iterator& current1, - const ParamGenerator& g2, - const typename ParamGenerator::iterator& current2, - const ParamGenerator& g3, - const typename ParamGenerator::iterator& current3, - const ParamGenerator& g4, - const typename ParamGenerator::iterator& current4, - const ParamGenerator& g5, - const typename ParamGenerator::iterator& current5) - : base_(base), - begin1_(g1.begin()), end1_(g1.end()), current1_(current1), - begin2_(g2.begin()), end2_(g2.end()), current2_(current2), - begin3_(g3.begin()), end3_(g3.end()), current3_(current3), - begin4_(g4.begin()), end4_(g4.end()), current4_(current4), - begin5_(g5.begin()), end5_(g5.end()), current5_(current5) { - ComputeCurrentValue(); - } - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - // Advance should not be called on beyond-of-range iterators - // so no component iterators must be beyond end of range, either. - virtual void Advance() { - assert(!AtEnd()); - ++current5_; - if (current5_ == end5_) { - current5_ = begin5_; - ++current4_; - } - if (current4_ == end4_) { - current4_ = begin4_; - ++current3_; - } - if (current3_ == end3_) { - current3_ = begin3_; - ++current2_; - } - if (current2_ == end2_) { - current2_ = begin2_; - ++current1_; - } - ComputeCurrentValue(); - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - virtual const ParamType* Current() const { return ¤t_value_; } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - const Iterator* typed_other = - CheckedDowncastToActualType(&other); - // We must report iterators equal if they both point beyond their - // respective ranges. That can happen in a variety of fashions, - // so we have to consult AtEnd(). - return (AtEnd() && typed_other->AtEnd()) || - ( - current1_ == typed_other->current1_ && - current2_ == typed_other->current2_ && - current3_ == typed_other->current3_ && - current4_ == typed_other->current4_ && - current5_ == typed_other->current5_); - } - - private: - Iterator(const Iterator& other) - : base_(other.base_), - begin1_(other.begin1_), - end1_(other.end1_), - current1_(other.current1_), - begin2_(other.begin2_), - end2_(other.end2_), - current2_(other.current2_), - begin3_(other.begin3_), - end3_(other.end3_), - current3_(other.current3_), - begin4_(other.begin4_), - end4_(other.end4_), - current4_(other.current4_), - begin5_(other.begin5_), - end5_(other.end5_), - current5_(other.current5_) { - ComputeCurrentValue(); - } - - void ComputeCurrentValue() { - if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_, *current5_); - } - bool AtEnd() const { - // We must report iterator past the end of the range when either of the - // component iterators has reached the end of its range. - return - current1_ == end1_ || - current2_ == end2_ || - current3_ == end3_ || - current4_ == end4_ || - current5_ == end5_; - } - - // No implementation - assignment is unsupported. - void operator=(const Iterator& other); - - const ParamGeneratorInterface* const base_; - // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. - // current[i]_ is the actual traversing iterator. - const typename ParamGenerator::iterator begin1_; - const typename ParamGenerator::iterator end1_; - typename ParamGenerator::iterator current1_; - const typename ParamGenerator::iterator begin2_; - const typename ParamGenerator::iterator end2_; - typename ParamGenerator::iterator current2_; - const typename ParamGenerator::iterator begin3_; - const typename ParamGenerator::iterator end3_; - typename ParamGenerator::iterator current3_; - const typename ParamGenerator::iterator begin4_; - const typename ParamGenerator::iterator end4_; - typename ParamGenerator::iterator current4_; - const typename ParamGenerator::iterator begin5_; - const typename ParamGenerator::iterator end5_; - typename ParamGenerator::iterator current5_; - ParamType current_value_; - }; // class CartesianProductGenerator5::Iterator - - // No implementation - assignment is unsupported. - void operator=(const CartesianProductGenerator5& other); - - const ParamGenerator g1_; - const ParamGenerator g2_; - const ParamGenerator g3_; - const ParamGenerator g4_; - const ParamGenerator g5_; -}; // class CartesianProductGenerator5 - - -template -class CartesianProductGenerator6 - : public ParamGeneratorInterface< ::std::tr1::tuple > { - public: - typedef ::std::tr1::tuple ParamType; - - CartesianProductGenerator6(const ParamGenerator& g1, - const ParamGenerator& g2, const ParamGenerator& g3, - const ParamGenerator& g4, const ParamGenerator& g5, - const ParamGenerator& g6) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {} - virtual ~CartesianProductGenerator6() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, - g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin()); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), - g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end()); - } - - private: - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, - const ParamGenerator& g1, - const typename ParamGenerator::iterator& current1, - const ParamGenerator& g2, - const typename ParamGenerator::iterator& current2, - const ParamGenerator& g3, - const typename ParamGenerator::iterator& current3, - const ParamGenerator& g4, - const typename ParamGenerator::iterator& current4, - const ParamGenerator& g5, - const typename ParamGenerator::iterator& current5, - const ParamGenerator& g6, - const typename ParamGenerator::iterator& current6) - : base_(base), - begin1_(g1.begin()), end1_(g1.end()), current1_(current1), - begin2_(g2.begin()), end2_(g2.end()), current2_(current2), - begin3_(g3.begin()), end3_(g3.end()), current3_(current3), - begin4_(g4.begin()), end4_(g4.end()), current4_(current4), - begin5_(g5.begin()), end5_(g5.end()), current5_(current5), - begin6_(g6.begin()), end6_(g6.end()), current6_(current6) { - ComputeCurrentValue(); - } - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - // Advance should not be called on beyond-of-range iterators - // so no component iterators must be beyond end of range, either. - virtual void Advance() { - assert(!AtEnd()); - ++current6_; - if (current6_ == end6_) { - current6_ = begin6_; - ++current5_; - } - if (current5_ == end5_) { - current5_ = begin5_; - ++current4_; - } - if (current4_ == end4_) { - current4_ = begin4_; - ++current3_; - } - if (current3_ == end3_) { - current3_ = begin3_; - ++current2_; - } - if (current2_ == end2_) { - current2_ = begin2_; - ++current1_; - } - ComputeCurrentValue(); - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - virtual const ParamType* Current() const { return ¤t_value_; } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - const Iterator* typed_other = - CheckedDowncastToActualType(&other); - // We must report iterators equal if they both point beyond their - // respective ranges. That can happen in a variety of fashions, - // so we have to consult AtEnd(). - return (AtEnd() && typed_other->AtEnd()) || - ( - current1_ == typed_other->current1_ && - current2_ == typed_other->current2_ && - current3_ == typed_other->current3_ && - current4_ == typed_other->current4_ && - current5_ == typed_other->current5_ && - current6_ == typed_other->current6_); - } - - private: - Iterator(const Iterator& other) - : base_(other.base_), - begin1_(other.begin1_), - end1_(other.end1_), - current1_(other.current1_), - begin2_(other.begin2_), - end2_(other.end2_), - current2_(other.current2_), - begin3_(other.begin3_), - end3_(other.end3_), - current3_(other.current3_), - begin4_(other.begin4_), - end4_(other.end4_), - current4_(other.current4_), - begin5_(other.begin5_), - end5_(other.end5_), - current5_(other.current5_), - begin6_(other.begin6_), - end6_(other.end6_), - current6_(other.current6_) { - ComputeCurrentValue(); - } - - void ComputeCurrentValue() { - if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_, *current5_, *current6_); - } - bool AtEnd() const { - // We must report iterator past the end of the range when either of the - // component iterators has reached the end of its range. - return - current1_ == end1_ || - current2_ == end2_ || - current3_ == end3_ || - current4_ == end4_ || - current5_ == end5_ || - current6_ == end6_; - } - - // No implementation - assignment is unsupported. - void operator=(const Iterator& other); - - const ParamGeneratorInterface* const base_; - // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. - // current[i]_ is the actual traversing iterator. - const typename ParamGenerator::iterator begin1_; - const typename ParamGenerator::iterator end1_; - typename ParamGenerator::iterator current1_; - const typename ParamGenerator::iterator begin2_; - const typename ParamGenerator::iterator end2_; - typename ParamGenerator::iterator current2_; - const typename ParamGenerator::iterator begin3_; - const typename ParamGenerator::iterator end3_; - typename ParamGenerator::iterator current3_; - const typename ParamGenerator::iterator begin4_; - const typename ParamGenerator::iterator end4_; - typename ParamGenerator::iterator current4_; - const typename ParamGenerator::iterator begin5_; - const typename ParamGenerator::iterator end5_; - typename ParamGenerator::iterator current5_; - const typename ParamGenerator::iterator begin6_; - const typename ParamGenerator::iterator end6_; - typename ParamGenerator::iterator current6_; - ParamType current_value_; - }; // class CartesianProductGenerator6::Iterator - - // No implementation - assignment is unsupported. - void operator=(const CartesianProductGenerator6& other); - - const ParamGenerator g1_; - const ParamGenerator g2_; - const ParamGenerator g3_; - const ParamGenerator g4_; - const ParamGenerator g5_; - const ParamGenerator g6_; -}; // class CartesianProductGenerator6 - - -template -class CartesianProductGenerator7 - : public ParamGeneratorInterface< ::std::tr1::tuple > { - public: - typedef ::std::tr1::tuple ParamType; - - CartesianProductGenerator7(const ParamGenerator& g1, - const ParamGenerator& g2, const ParamGenerator& g3, - const ParamGenerator& g4, const ParamGenerator& g5, - const ParamGenerator& g6, const ParamGenerator& g7) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {} - virtual ~CartesianProductGenerator7() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, - g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, - g7_.begin()); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), - g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end()); - } - - private: - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, - const ParamGenerator& g1, - const typename ParamGenerator::iterator& current1, - const ParamGenerator& g2, - const typename ParamGenerator::iterator& current2, - const ParamGenerator& g3, - const typename ParamGenerator::iterator& current3, - const ParamGenerator& g4, - const typename ParamGenerator::iterator& current4, - const ParamGenerator& g5, - const typename ParamGenerator::iterator& current5, - const ParamGenerator& g6, - const typename ParamGenerator::iterator& current6, - const ParamGenerator& g7, - const typename ParamGenerator::iterator& current7) - : base_(base), - begin1_(g1.begin()), end1_(g1.end()), current1_(current1), - begin2_(g2.begin()), end2_(g2.end()), current2_(current2), - begin3_(g3.begin()), end3_(g3.end()), current3_(current3), - begin4_(g4.begin()), end4_(g4.end()), current4_(current4), - begin5_(g5.begin()), end5_(g5.end()), current5_(current5), - begin6_(g6.begin()), end6_(g6.end()), current6_(current6), - begin7_(g7.begin()), end7_(g7.end()), current7_(current7) { - ComputeCurrentValue(); - } - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - // Advance should not be called on beyond-of-range iterators - // so no component iterators must be beyond end of range, either. - virtual void Advance() { - assert(!AtEnd()); - ++current7_; - if (current7_ == end7_) { - current7_ = begin7_; - ++current6_; - } - if (current6_ == end6_) { - current6_ = begin6_; - ++current5_; - } - if (current5_ == end5_) { - current5_ = begin5_; - ++current4_; - } - if (current4_ == end4_) { - current4_ = begin4_; - ++current3_; - } - if (current3_ == end3_) { - current3_ = begin3_; - ++current2_; - } - if (current2_ == end2_) { - current2_ = begin2_; - ++current1_; - } - ComputeCurrentValue(); - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - virtual const ParamType* Current() const { return ¤t_value_; } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - const Iterator* typed_other = - CheckedDowncastToActualType(&other); - // We must report iterators equal if they both point beyond their - // respective ranges. That can happen in a variety of fashions, - // so we have to consult AtEnd(). - return (AtEnd() && typed_other->AtEnd()) || - ( - current1_ == typed_other->current1_ && - current2_ == typed_other->current2_ && - current3_ == typed_other->current3_ && - current4_ == typed_other->current4_ && - current5_ == typed_other->current5_ && - current6_ == typed_other->current6_ && - current7_ == typed_other->current7_); - } - - private: - Iterator(const Iterator& other) - : base_(other.base_), - begin1_(other.begin1_), - end1_(other.end1_), - current1_(other.current1_), - begin2_(other.begin2_), - end2_(other.end2_), - current2_(other.current2_), - begin3_(other.begin3_), - end3_(other.end3_), - current3_(other.current3_), - begin4_(other.begin4_), - end4_(other.end4_), - current4_(other.current4_), - begin5_(other.begin5_), - end5_(other.end5_), - current5_(other.current5_), - begin6_(other.begin6_), - end6_(other.end6_), - current6_(other.current6_), - begin7_(other.begin7_), - end7_(other.end7_), - current7_(other.current7_) { - ComputeCurrentValue(); - } - - void ComputeCurrentValue() { - if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_, *current5_, *current6_, *current7_); - } - bool AtEnd() const { - // We must report iterator past the end of the range when either of the - // component iterators has reached the end of its range. - return - current1_ == end1_ || - current2_ == end2_ || - current3_ == end3_ || - current4_ == end4_ || - current5_ == end5_ || - current6_ == end6_ || - current7_ == end7_; - } - - // No implementation - assignment is unsupported. - void operator=(const Iterator& other); - - const ParamGeneratorInterface* const base_; - // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. - // current[i]_ is the actual traversing iterator. - const typename ParamGenerator::iterator begin1_; - const typename ParamGenerator::iterator end1_; - typename ParamGenerator::iterator current1_; - const typename ParamGenerator::iterator begin2_; - const typename ParamGenerator::iterator end2_; - typename ParamGenerator::iterator current2_; - const typename ParamGenerator::iterator begin3_; - const typename ParamGenerator::iterator end3_; - typename ParamGenerator::iterator current3_; - const typename ParamGenerator::iterator begin4_; - const typename ParamGenerator::iterator end4_; - typename ParamGenerator::iterator current4_; - const typename ParamGenerator::iterator begin5_; - const typename ParamGenerator::iterator end5_; - typename ParamGenerator::iterator current5_; - const typename ParamGenerator::iterator begin6_; - const typename ParamGenerator::iterator end6_; - typename ParamGenerator::iterator current6_; - const typename ParamGenerator::iterator begin7_; - const typename ParamGenerator::iterator end7_; - typename ParamGenerator::iterator current7_; - ParamType current_value_; - }; // class CartesianProductGenerator7::Iterator - - // No implementation - assignment is unsupported. - void operator=(const CartesianProductGenerator7& other); - - const ParamGenerator g1_; - const ParamGenerator g2_; - const ParamGenerator g3_; - const ParamGenerator g4_; - const ParamGenerator g5_; - const ParamGenerator g6_; - const ParamGenerator g7_; -}; // class CartesianProductGenerator7 - - -template -class CartesianProductGenerator8 - : public ParamGeneratorInterface< ::std::tr1::tuple > { - public: - typedef ::std::tr1::tuple ParamType; - - CartesianProductGenerator8(const ParamGenerator& g1, - const ParamGenerator& g2, const ParamGenerator& g3, - const ParamGenerator& g4, const ParamGenerator& g5, - const ParamGenerator& g6, const ParamGenerator& g7, - const ParamGenerator& g8) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), - g8_(g8) {} - virtual ~CartesianProductGenerator8() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, - g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, - g7_.begin(), g8_, g8_.begin()); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), - g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, - g8_.end()); - } - - private: - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, - const ParamGenerator& g1, - const typename ParamGenerator::iterator& current1, - const ParamGenerator& g2, - const typename ParamGenerator::iterator& current2, - const ParamGenerator& g3, - const typename ParamGenerator::iterator& current3, - const ParamGenerator& g4, - const typename ParamGenerator::iterator& current4, - const ParamGenerator& g5, - const typename ParamGenerator::iterator& current5, - const ParamGenerator& g6, - const typename ParamGenerator::iterator& current6, - const ParamGenerator& g7, - const typename ParamGenerator::iterator& current7, - const ParamGenerator& g8, - const typename ParamGenerator::iterator& current8) - : base_(base), - begin1_(g1.begin()), end1_(g1.end()), current1_(current1), - begin2_(g2.begin()), end2_(g2.end()), current2_(current2), - begin3_(g3.begin()), end3_(g3.end()), current3_(current3), - begin4_(g4.begin()), end4_(g4.end()), current4_(current4), - begin5_(g5.begin()), end5_(g5.end()), current5_(current5), - begin6_(g6.begin()), end6_(g6.end()), current6_(current6), - begin7_(g7.begin()), end7_(g7.end()), current7_(current7), - begin8_(g8.begin()), end8_(g8.end()), current8_(current8) { - ComputeCurrentValue(); - } - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - // Advance should not be called on beyond-of-range iterators - // so no component iterators must be beyond end of range, either. - virtual void Advance() { - assert(!AtEnd()); - ++current8_; - if (current8_ == end8_) { - current8_ = begin8_; - ++current7_; - } - if (current7_ == end7_) { - current7_ = begin7_; - ++current6_; - } - if (current6_ == end6_) { - current6_ = begin6_; - ++current5_; - } - if (current5_ == end5_) { - current5_ = begin5_; - ++current4_; - } - if (current4_ == end4_) { - current4_ = begin4_; - ++current3_; - } - if (current3_ == end3_) { - current3_ = begin3_; - ++current2_; - } - if (current2_ == end2_) { - current2_ = begin2_; - ++current1_; - } - ComputeCurrentValue(); - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - virtual const ParamType* Current() const { return ¤t_value_; } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - const Iterator* typed_other = - CheckedDowncastToActualType(&other); - // We must report iterators equal if they both point beyond their - // respective ranges. That can happen in a variety of fashions, - // so we have to consult AtEnd(). - return (AtEnd() && typed_other->AtEnd()) || - ( - current1_ == typed_other->current1_ && - current2_ == typed_other->current2_ && - current3_ == typed_other->current3_ && - current4_ == typed_other->current4_ && - current5_ == typed_other->current5_ && - current6_ == typed_other->current6_ && - current7_ == typed_other->current7_ && - current8_ == typed_other->current8_); - } - - private: - Iterator(const Iterator& other) - : base_(other.base_), - begin1_(other.begin1_), - end1_(other.end1_), - current1_(other.current1_), - begin2_(other.begin2_), - end2_(other.end2_), - current2_(other.current2_), - begin3_(other.begin3_), - end3_(other.end3_), - current3_(other.current3_), - begin4_(other.begin4_), - end4_(other.end4_), - current4_(other.current4_), - begin5_(other.begin5_), - end5_(other.end5_), - current5_(other.current5_), - begin6_(other.begin6_), - end6_(other.end6_), - current6_(other.current6_), - begin7_(other.begin7_), - end7_(other.end7_), - current7_(other.current7_), - begin8_(other.begin8_), - end8_(other.end8_), - current8_(other.current8_) { - ComputeCurrentValue(); - } - - void ComputeCurrentValue() { - if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_, *current5_, *current6_, *current7_, *current8_); - } - bool AtEnd() const { - // We must report iterator past the end of the range when either of the - // component iterators has reached the end of its range. - return - current1_ == end1_ || - current2_ == end2_ || - current3_ == end3_ || - current4_ == end4_ || - current5_ == end5_ || - current6_ == end6_ || - current7_ == end7_ || - current8_ == end8_; - } - - // No implementation - assignment is unsupported. - void operator=(const Iterator& other); - - const ParamGeneratorInterface* const base_; - // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. - // current[i]_ is the actual traversing iterator. - const typename ParamGenerator::iterator begin1_; - const typename ParamGenerator::iterator end1_; - typename ParamGenerator::iterator current1_; - const typename ParamGenerator::iterator begin2_; - const typename ParamGenerator::iterator end2_; - typename ParamGenerator::iterator current2_; - const typename ParamGenerator::iterator begin3_; - const typename ParamGenerator::iterator end3_; - typename ParamGenerator::iterator current3_; - const typename ParamGenerator::iterator begin4_; - const typename ParamGenerator::iterator end4_; - typename ParamGenerator::iterator current4_; - const typename ParamGenerator::iterator begin5_; - const typename ParamGenerator::iterator end5_; - typename ParamGenerator::iterator current5_; - const typename ParamGenerator::iterator begin6_; - const typename ParamGenerator::iterator end6_; - typename ParamGenerator::iterator current6_; - const typename ParamGenerator::iterator begin7_; - const typename ParamGenerator::iterator end7_; - typename ParamGenerator::iterator current7_; - const typename ParamGenerator::iterator begin8_; - const typename ParamGenerator::iterator end8_; - typename ParamGenerator::iterator current8_; - ParamType current_value_; - }; // class CartesianProductGenerator8::Iterator - - // No implementation - assignment is unsupported. - void operator=(const CartesianProductGenerator8& other); - - const ParamGenerator g1_; - const ParamGenerator g2_; - const ParamGenerator g3_; - const ParamGenerator g4_; - const ParamGenerator g5_; - const ParamGenerator g6_; - const ParamGenerator g7_; - const ParamGenerator g8_; -}; // class CartesianProductGenerator8 - - -template -class CartesianProductGenerator9 - : public ParamGeneratorInterface< ::std::tr1::tuple > { - public: - typedef ::std::tr1::tuple ParamType; - - CartesianProductGenerator9(const ParamGenerator& g1, - const ParamGenerator& g2, const ParamGenerator& g3, - const ParamGenerator& g4, const ParamGenerator& g5, - const ParamGenerator& g6, const ParamGenerator& g7, - const ParamGenerator& g8, const ParamGenerator& g9) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), - g9_(g9) {} - virtual ~CartesianProductGenerator9() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, - g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, - g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin()); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), - g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, - g8_.end(), g9_, g9_.end()); - } - - private: - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, - const ParamGenerator& g1, - const typename ParamGenerator::iterator& current1, - const ParamGenerator& g2, - const typename ParamGenerator::iterator& current2, - const ParamGenerator& g3, - const typename ParamGenerator::iterator& current3, - const ParamGenerator& g4, - const typename ParamGenerator::iterator& current4, - const ParamGenerator& g5, - const typename ParamGenerator::iterator& current5, - const ParamGenerator& g6, - const typename ParamGenerator::iterator& current6, - const ParamGenerator& g7, - const typename ParamGenerator::iterator& current7, - const ParamGenerator& g8, - const typename ParamGenerator::iterator& current8, - const ParamGenerator& g9, - const typename ParamGenerator::iterator& current9) - : base_(base), - begin1_(g1.begin()), end1_(g1.end()), current1_(current1), - begin2_(g2.begin()), end2_(g2.end()), current2_(current2), - begin3_(g3.begin()), end3_(g3.end()), current3_(current3), - begin4_(g4.begin()), end4_(g4.end()), current4_(current4), - begin5_(g5.begin()), end5_(g5.end()), current5_(current5), - begin6_(g6.begin()), end6_(g6.end()), current6_(current6), - begin7_(g7.begin()), end7_(g7.end()), current7_(current7), - begin8_(g8.begin()), end8_(g8.end()), current8_(current8), - begin9_(g9.begin()), end9_(g9.end()), current9_(current9) { - ComputeCurrentValue(); - } - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - // Advance should not be called on beyond-of-range iterators - // so no component iterators must be beyond end of range, either. - virtual void Advance() { - assert(!AtEnd()); - ++current9_; - if (current9_ == end9_) { - current9_ = begin9_; - ++current8_; - } - if (current8_ == end8_) { - current8_ = begin8_; - ++current7_; - } - if (current7_ == end7_) { - current7_ = begin7_; - ++current6_; - } - if (current6_ == end6_) { - current6_ = begin6_; - ++current5_; - } - if (current5_ == end5_) { - current5_ = begin5_; - ++current4_; - } - if (current4_ == end4_) { - current4_ = begin4_; - ++current3_; - } - if (current3_ == end3_) { - current3_ = begin3_; - ++current2_; - } - if (current2_ == end2_) { - current2_ = begin2_; - ++current1_; - } - ComputeCurrentValue(); - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - virtual const ParamType* Current() const { return ¤t_value_; } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - const Iterator* typed_other = - CheckedDowncastToActualType(&other); - // We must report iterators equal if they both point beyond their - // respective ranges. That can happen in a variety of fashions, - // so we have to consult AtEnd(). - return (AtEnd() && typed_other->AtEnd()) || - ( - current1_ == typed_other->current1_ && - current2_ == typed_other->current2_ && - current3_ == typed_other->current3_ && - current4_ == typed_other->current4_ && - current5_ == typed_other->current5_ && - current6_ == typed_other->current6_ && - current7_ == typed_other->current7_ && - current8_ == typed_other->current8_ && - current9_ == typed_other->current9_); - } - - private: - Iterator(const Iterator& other) - : base_(other.base_), - begin1_(other.begin1_), - end1_(other.end1_), - current1_(other.current1_), - begin2_(other.begin2_), - end2_(other.end2_), - current2_(other.current2_), - begin3_(other.begin3_), - end3_(other.end3_), - current3_(other.current3_), - begin4_(other.begin4_), - end4_(other.end4_), - current4_(other.current4_), - begin5_(other.begin5_), - end5_(other.end5_), - current5_(other.current5_), - begin6_(other.begin6_), - end6_(other.end6_), - current6_(other.current6_), - begin7_(other.begin7_), - end7_(other.end7_), - current7_(other.current7_), - begin8_(other.begin8_), - end8_(other.end8_), - current8_(other.current8_), - begin9_(other.begin9_), - end9_(other.end9_), - current9_(other.current9_) { - ComputeCurrentValue(); - } - - void ComputeCurrentValue() { - if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_, *current5_, *current6_, *current7_, *current8_, - *current9_); - } - bool AtEnd() const { - // We must report iterator past the end of the range when either of the - // component iterators has reached the end of its range. - return - current1_ == end1_ || - current2_ == end2_ || - current3_ == end3_ || - current4_ == end4_ || - current5_ == end5_ || - current6_ == end6_ || - current7_ == end7_ || - current8_ == end8_ || - current9_ == end9_; - } - - // No implementation - assignment is unsupported. - void operator=(const Iterator& other); - - const ParamGeneratorInterface* const base_; - // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. - // current[i]_ is the actual traversing iterator. - const typename ParamGenerator::iterator begin1_; - const typename ParamGenerator::iterator end1_; - typename ParamGenerator::iterator current1_; - const typename ParamGenerator::iterator begin2_; - const typename ParamGenerator::iterator end2_; - typename ParamGenerator::iterator current2_; - const typename ParamGenerator::iterator begin3_; - const typename ParamGenerator::iterator end3_; - typename ParamGenerator::iterator current3_; - const typename ParamGenerator::iterator begin4_; - const typename ParamGenerator::iterator end4_; - typename ParamGenerator::iterator current4_; - const typename ParamGenerator::iterator begin5_; - const typename ParamGenerator::iterator end5_; - typename ParamGenerator::iterator current5_; - const typename ParamGenerator::iterator begin6_; - const typename ParamGenerator::iterator end6_; - typename ParamGenerator::iterator current6_; - const typename ParamGenerator::iterator begin7_; - const typename ParamGenerator::iterator end7_; - typename ParamGenerator::iterator current7_; - const typename ParamGenerator::iterator begin8_; - const typename ParamGenerator::iterator end8_; - typename ParamGenerator::iterator current8_; - const typename ParamGenerator::iterator begin9_; - const typename ParamGenerator::iterator end9_; - typename ParamGenerator::iterator current9_; - ParamType current_value_; - }; // class CartesianProductGenerator9::Iterator - - // No implementation - assignment is unsupported. - void operator=(const CartesianProductGenerator9& other); - - const ParamGenerator g1_; - const ParamGenerator g2_; - const ParamGenerator g3_; - const ParamGenerator g4_; - const ParamGenerator g5_; - const ParamGenerator g6_; - const ParamGenerator g7_; - const ParamGenerator g8_; - const ParamGenerator g9_; -}; // class CartesianProductGenerator9 - - -template -class CartesianProductGenerator10 - : public ParamGeneratorInterface< ::std::tr1::tuple > { - public: - typedef ::std::tr1::tuple ParamType; - - CartesianProductGenerator10(const ParamGenerator& g1, - const ParamGenerator& g2, const ParamGenerator& g3, - const ParamGenerator& g4, const ParamGenerator& g5, - const ParamGenerator& g6, const ParamGenerator& g7, - const ParamGenerator& g8, const ParamGenerator& g9, - const ParamGenerator& g10) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), - g9_(g9), g10_(g10) {} - virtual ~CartesianProductGenerator10() {} - - virtual ParamIteratorInterface* Begin() const { - return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, - g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, - g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin()); - } - virtual ParamIteratorInterface* End() const { - return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), - g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, - g8_.end(), g9_, g9_.end(), g10_, g10_.end()); - } - - private: - class Iterator : public ParamIteratorInterface { - public: - Iterator(const ParamGeneratorInterface* base, - const ParamGenerator& g1, - const typename ParamGenerator::iterator& current1, - const ParamGenerator& g2, - const typename ParamGenerator::iterator& current2, - const ParamGenerator& g3, - const typename ParamGenerator::iterator& current3, - const ParamGenerator& g4, - const typename ParamGenerator::iterator& current4, - const ParamGenerator& g5, - const typename ParamGenerator::iterator& current5, - const ParamGenerator& g6, - const typename ParamGenerator::iterator& current6, - const ParamGenerator& g7, - const typename ParamGenerator::iterator& current7, - const ParamGenerator& g8, - const typename ParamGenerator::iterator& current8, - const ParamGenerator& g9, - const typename ParamGenerator::iterator& current9, - const ParamGenerator& g10, - const typename ParamGenerator::iterator& current10) - : base_(base), - begin1_(g1.begin()), end1_(g1.end()), current1_(current1), - begin2_(g2.begin()), end2_(g2.end()), current2_(current2), - begin3_(g3.begin()), end3_(g3.end()), current3_(current3), - begin4_(g4.begin()), end4_(g4.end()), current4_(current4), - begin5_(g5.begin()), end5_(g5.end()), current5_(current5), - begin6_(g6.begin()), end6_(g6.end()), current6_(current6), - begin7_(g7.begin()), end7_(g7.end()), current7_(current7), - begin8_(g8.begin()), end8_(g8.end()), current8_(current8), - begin9_(g9.begin()), end9_(g9.end()), current9_(current9), - begin10_(g10.begin()), end10_(g10.end()), current10_(current10) { - ComputeCurrentValue(); - } - virtual ~Iterator() {} - - virtual const ParamGeneratorInterface* BaseGenerator() const { - return base_; - } - // Advance should not be called on beyond-of-range iterators - // so no component iterators must be beyond end of range, either. - virtual void Advance() { - assert(!AtEnd()); - ++current10_; - if (current10_ == end10_) { - current10_ = begin10_; - ++current9_; - } - if (current9_ == end9_) { - current9_ = begin9_; - ++current8_; - } - if (current8_ == end8_) { - current8_ = begin8_; - ++current7_; - } - if (current7_ == end7_) { - current7_ = begin7_; - ++current6_; - } - if (current6_ == end6_) { - current6_ = begin6_; - ++current5_; - } - if (current5_ == end5_) { - current5_ = begin5_; - ++current4_; - } - if (current4_ == end4_) { - current4_ = begin4_; - ++current3_; - } - if (current3_ == end3_) { - current3_ = begin3_; - ++current2_; - } - if (current2_ == end2_) { - current2_ = begin2_; - ++current1_; - } - ComputeCurrentValue(); - } - virtual ParamIteratorInterface* Clone() const { - return new Iterator(*this); - } - virtual const ParamType* Current() const { return ¤t_value_; } - virtual bool Equals(const ParamIteratorInterface& other) const { - // Having the same base generator guarantees that the other - // iterator is of the same type and we can downcast. - GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) - << "The program attempted to compare iterators " - << "from different generators." << std::endl; - const Iterator* typed_other = - CheckedDowncastToActualType(&other); - // We must report iterators equal if they both point beyond their - // respective ranges. That can happen in a variety of fashions, - // so we have to consult AtEnd(). - return (AtEnd() && typed_other->AtEnd()) || - ( - current1_ == typed_other->current1_ && - current2_ == typed_other->current2_ && - current3_ == typed_other->current3_ && - current4_ == typed_other->current4_ && - current5_ == typed_other->current5_ && - current6_ == typed_other->current6_ && - current7_ == typed_other->current7_ && - current8_ == typed_other->current8_ && - current9_ == typed_other->current9_ && - current10_ == typed_other->current10_); - } - - private: - Iterator(const Iterator& other) - : base_(other.base_), - begin1_(other.begin1_), - end1_(other.end1_), - current1_(other.current1_), - begin2_(other.begin2_), - end2_(other.end2_), - current2_(other.current2_), - begin3_(other.begin3_), - end3_(other.end3_), - current3_(other.current3_), - begin4_(other.begin4_), - end4_(other.end4_), - current4_(other.current4_), - begin5_(other.begin5_), - end5_(other.end5_), - current5_(other.current5_), - begin6_(other.begin6_), - end6_(other.end6_), - current6_(other.current6_), - begin7_(other.begin7_), - end7_(other.end7_), - current7_(other.current7_), - begin8_(other.begin8_), - end8_(other.end8_), - current8_(other.current8_), - begin9_(other.begin9_), - end9_(other.end9_), - current9_(other.current9_), - begin10_(other.begin10_), - end10_(other.end10_), - current10_(other.current10_) { - ComputeCurrentValue(); - } - - void ComputeCurrentValue() { - if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_, *current5_, *current6_, *current7_, *current8_, - *current9_, *current10_); - } - bool AtEnd() const { - // We must report iterator past the end of the range when either of the - // component iterators has reached the end of its range. - return - current1_ == end1_ || - current2_ == end2_ || - current3_ == end3_ || - current4_ == end4_ || - current5_ == end5_ || - current6_ == end6_ || - current7_ == end7_ || - current8_ == end8_ || - current9_ == end9_ || - current10_ == end10_; - } - - // No implementation - assignment is unsupported. - void operator=(const Iterator& other); - - const ParamGeneratorInterface* const base_; - // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. - // current[i]_ is the actual traversing iterator. - const typename ParamGenerator::iterator begin1_; - const typename ParamGenerator::iterator end1_; - typename ParamGenerator::iterator current1_; - const typename ParamGenerator::iterator begin2_; - const typename ParamGenerator::iterator end2_; - typename ParamGenerator::iterator current2_; - const typename ParamGenerator::iterator begin3_; - const typename ParamGenerator::iterator end3_; - typename ParamGenerator::iterator current3_; - const typename ParamGenerator::iterator begin4_; - const typename ParamGenerator::iterator end4_; - typename ParamGenerator::iterator current4_; - const typename ParamGenerator::iterator begin5_; - const typename ParamGenerator::iterator end5_; - typename ParamGenerator::iterator current5_; - const typename ParamGenerator::iterator begin6_; - const typename ParamGenerator::iterator end6_; - typename ParamGenerator::iterator current6_; - const typename ParamGenerator::iterator begin7_; - const typename ParamGenerator::iterator end7_; - typename ParamGenerator::iterator current7_; - const typename ParamGenerator::iterator begin8_; - const typename ParamGenerator::iterator end8_; - typename ParamGenerator::iterator current8_; - const typename ParamGenerator::iterator begin9_; - const typename ParamGenerator::iterator end9_; - typename ParamGenerator::iterator current9_; - const typename ParamGenerator::iterator begin10_; - const typename ParamGenerator::iterator end10_; - typename ParamGenerator::iterator current10_; - ParamType current_value_; - }; // class CartesianProductGenerator10::Iterator - - // No implementation - assignment is unsupported. - void operator=(const CartesianProductGenerator10& other); - - const ParamGenerator g1_; - const ParamGenerator g2_; - const ParamGenerator g3_; - const ParamGenerator g4_; - const ParamGenerator g5_; - const ParamGenerator g6_; - const ParamGenerator g7_; - const ParamGenerator g8_; - const ParamGenerator g9_; - const ParamGenerator g10_; -}; // class CartesianProductGenerator10 - - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// Helper classes providing Combine() with polymorphic features. They allow -// casting CartesianProductGeneratorN to ParamGenerator if T is -// convertible to U. -// -template -class CartesianProductHolder2 { - public: -CartesianProductHolder2(const Generator1& g1, const Generator2& g2) - : g1_(g1), g2_(g2) {} - template - operator ParamGenerator< ::std::tr1::tuple >() const { - return ParamGenerator< ::std::tr1::tuple >( - new CartesianProductGenerator2( - static_cast >(g1_), - static_cast >(g2_))); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const CartesianProductHolder2& other); - - const Generator1 g1_; - const Generator2 g2_; -}; // class CartesianProductHolder2 - -template -class CartesianProductHolder3 { - public: -CartesianProductHolder3(const Generator1& g1, const Generator2& g2, - const Generator3& g3) - : g1_(g1), g2_(g2), g3_(g3) {} - template - operator ParamGenerator< ::std::tr1::tuple >() const { - return ParamGenerator< ::std::tr1::tuple >( - new CartesianProductGenerator3( - static_cast >(g1_), - static_cast >(g2_), - static_cast >(g3_))); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const CartesianProductHolder3& other); - - const Generator1 g1_; - const Generator2 g2_; - const Generator3 g3_; -}; // class CartesianProductHolder3 - -template -class CartesianProductHolder4 { - public: -CartesianProductHolder4(const Generator1& g1, const Generator2& g2, - const Generator3& g3, const Generator4& g4) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {} - template - operator ParamGenerator< ::std::tr1::tuple >() const { - return ParamGenerator< ::std::tr1::tuple >( - new CartesianProductGenerator4( - static_cast >(g1_), - static_cast >(g2_), - static_cast >(g3_), - static_cast >(g4_))); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const CartesianProductHolder4& other); - - const Generator1 g1_; - const Generator2 g2_; - const Generator3 g3_; - const Generator4 g4_; -}; // class CartesianProductHolder4 - -template -class CartesianProductHolder5 { - public: -CartesianProductHolder5(const Generator1& g1, const Generator2& g2, - const Generator3& g3, const Generator4& g4, const Generator5& g5) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {} - template - operator ParamGenerator< ::std::tr1::tuple >() const { - return ParamGenerator< ::std::tr1::tuple >( - new CartesianProductGenerator5( - static_cast >(g1_), - static_cast >(g2_), - static_cast >(g3_), - static_cast >(g4_), - static_cast >(g5_))); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const CartesianProductHolder5& other); - - const Generator1 g1_; - const Generator2 g2_; - const Generator3 g3_; - const Generator4 g4_; - const Generator5 g5_; -}; // class CartesianProductHolder5 - -template -class CartesianProductHolder6 { - public: -CartesianProductHolder6(const Generator1& g1, const Generator2& g2, - const Generator3& g3, const Generator4& g4, const Generator5& g5, - const Generator6& g6) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {} - template - operator ParamGenerator< ::std::tr1::tuple >() const { - return ParamGenerator< ::std::tr1::tuple >( - new CartesianProductGenerator6( - static_cast >(g1_), - static_cast >(g2_), - static_cast >(g3_), - static_cast >(g4_), - static_cast >(g5_), - static_cast >(g6_))); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const CartesianProductHolder6& other); - - const Generator1 g1_; - const Generator2 g2_; - const Generator3 g3_; - const Generator4 g4_; - const Generator5 g5_; - const Generator6 g6_; -}; // class CartesianProductHolder6 - -template -class CartesianProductHolder7 { - public: -CartesianProductHolder7(const Generator1& g1, const Generator2& g2, - const Generator3& g3, const Generator4& g4, const Generator5& g5, - const Generator6& g6, const Generator7& g7) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {} - template - operator ParamGenerator< ::std::tr1::tuple >() const { - return ParamGenerator< ::std::tr1::tuple >( - new CartesianProductGenerator7( - static_cast >(g1_), - static_cast >(g2_), - static_cast >(g3_), - static_cast >(g4_), - static_cast >(g5_), - static_cast >(g6_), - static_cast >(g7_))); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const CartesianProductHolder7& other); - - const Generator1 g1_; - const Generator2 g2_; - const Generator3 g3_; - const Generator4 g4_; - const Generator5 g5_; - const Generator6 g6_; - const Generator7 g7_; -}; // class CartesianProductHolder7 - -template -class CartesianProductHolder8 { - public: -CartesianProductHolder8(const Generator1& g1, const Generator2& g2, - const Generator3& g3, const Generator4& g4, const Generator5& g5, - const Generator6& g6, const Generator7& g7, const Generator8& g8) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), - g8_(g8) {} - template - operator ParamGenerator< ::std::tr1::tuple >() const { - return ParamGenerator< ::std::tr1::tuple >( - new CartesianProductGenerator8( - static_cast >(g1_), - static_cast >(g2_), - static_cast >(g3_), - static_cast >(g4_), - static_cast >(g5_), - static_cast >(g6_), - static_cast >(g7_), - static_cast >(g8_))); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const CartesianProductHolder8& other); - - const Generator1 g1_; - const Generator2 g2_; - const Generator3 g3_; - const Generator4 g4_; - const Generator5 g5_; - const Generator6 g6_; - const Generator7 g7_; - const Generator8 g8_; -}; // class CartesianProductHolder8 - -template -class CartesianProductHolder9 { - public: -CartesianProductHolder9(const Generator1& g1, const Generator2& g2, - const Generator3& g3, const Generator4& g4, const Generator5& g5, - const Generator6& g6, const Generator7& g7, const Generator8& g8, - const Generator9& g9) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), - g9_(g9) {} - template - operator ParamGenerator< ::std::tr1::tuple >() const { - return ParamGenerator< ::std::tr1::tuple >( - new CartesianProductGenerator9( - static_cast >(g1_), - static_cast >(g2_), - static_cast >(g3_), - static_cast >(g4_), - static_cast >(g5_), - static_cast >(g6_), - static_cast >(g7_), - static_cast >(g8_), - static_cast >(g9_))); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const CartesianProductHolder9& other); - - const Generator1 g1_; - const Generator2 g2_; - const Generator3 g3_; - const Generator4 g4_; - const Generator5 g5_; - const Generator6 g6_; - const Generator7 g7_; - const Generator8 g8_; - const Generator9 g9_; -}; // class CartesianProductHolder9 - -template -class CartesianProductHolder10 { - public: -CartesianProductHolder10(const Generator1& g1, const Generator2& g2, - const Generator3& g3, const Generator4& g4, const Generator5& g5, - const Generator6& g6, const Generator7& g7, const Generator8& g8, - const Generator9& g9, const Generator10& g10) - : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), - g9_(g9), g10_(g10) {} - template - operator ParamGenerator< ::std::tr1::tuple >() const { - return ParamGenerator< ::std::tr1::tuple >( - new CartesianProductGenerator10( - static_cast >(g1_), - static_cast >(g2_), - static_cast >(g3_), - static_cast >(g4_), - static_cast >(g5_), - static_cast >(g6_), - static_cast >(g7_), - static_cast >(g8_), - static_cast >(g9_), - static_cast >(g10_))); - } - - private: - // No implementation - assignment is unsupported. - void operator=(const CartesianProductHolder10& other); - - const Generator1 g1_; - const Generator2 g2_; - const Generator3 g3_; - const Generator4 g4_; - const Generator5 g5_; - const Generator6 g6_; - const Generator7 g7_; - const Generator8 g8_; - const Generator9 g9_; - const Generator10 g10_; -}; // class CartesianProductHolder10 - -# endif // GTEST_HAS_COMBINE - -} // namespace internal -} // namespace testing - -#endif // GTEST_HAS_PARAM_TEST - -#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ - -#if GTEST_HAS_PARAM_TEST - -namespace testing { - -// Functions producing parameter generators. -// -// Google Test uses these generators to produce parameters for value- -// parameterized tests. When a parameterized test case is instantiated -// with a particular generator, Google Test creates and runs tests -// for each element in the sequence produced by the generator. -// -// In the following sample, tests from test case FooTest are instantiated -// each three times with parameter values 3, 5, and 8: -// -// class FooTest : public TestWithParam { ... }; -// -// TEST_P(FooTest, TestThis) { -// } -// TEST_P(FooTest, TestThat) { -// } -// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8)); -// - -// Range() returns generators providing sequences of values in a range. -// -// Synopsis: -// Range(start, end) -// - returns a generator producing a sequence of values {start, start+1, -// start+2, ..., }. -// Range(start, end, step) -// - returns a generator producing a sequence of values {start, start+step, -// start+step+step, ..., }. -// Notes: -// * The generated sequences never include end. For example, Range(1, 5) -// returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2) -// returns a generator producing {1, 3, 5, 7}. -// * start and end must have the same type. That type may be any integral or -// floating-point type or a user defined type satisfying these conditions: -// * It must be assignable (have operator=() defined). -// * It must have operator+() (operator+(int-compatible type) for -// two-operand version). -// * It must have operator<() defined. -// Elements in the resulting sequences will also have that type. -// * Condition start < end must be satisfied in order for resulting sequences -// to contain any elements. -// -template -internal::ParamGenerator Range(T start, T end, IncrementT step) { - return internal::ParamGenerator( - new internal::RangeGenerator(start, end, step)); -} - -template -internal::ParamGenerator Range(T start, T end) { - return Range(start, end, 1); -} - -// ValuesIn() function allows generation of tests with parameters coming from -// a container. -// -// Synopsis: -// ValuesIn(const T (&array)[N]) -// - returns a generator producing sequences with elements from -// a C-style array. -// ValuesIn(const Container& container) -// - returns a generator producing sequences with elements from -// an STL-style container. -// ValuesIn(Iterator begin, Iterator end) -// - returns a generator producing sequences with elements from -// a range [begin, end) defined by a pair of STL-style iterators. These -// iterators can also be plain C pointers. -// -// Please note that ValuesIn copies the values from the containers -// passed in and keeps them to generate tests in RUN_ALL_TESTS(). -// -// Examples: -// -// This instantiates tests from test case StringTest -// each with C-string values of "foo", "bar", and "baz": -// -// const char* strings[] = {"foo", "bar", "baz"}; -// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings)); -// -// This instantiates tests from test case StlStringTest -// each with STL strings with values "a" and "b": -// -// ::std::vector< ::std::string> GetParameterStrings() { -// ::std::vector< ::std::string> v; -// v.push_back("a"); -// v.push_back("b"); -// return v; -// } -// -// INSTANTIATE_TEST_CASE_P(CharSequence, -// StlStringTest, -// ValuesIn(GetParameterStrings())); -// -// -// This will also instantiate tests from CharTest -// each with parameter values 'a' and 'b': -// -// ::std::list GetParameterChars() { -// ::std::list list; -// list.push_back('a'); -// list.push_back('b'); -// return list; -// } -// ::std::list l = GetParameterChars(); -// INSTANTIATE_TEST_CASE_P(CharSequence2, -// CharTest, -// ValuesIn(l.begin(), l.end())); -// -template -internal::ParamGenerator< - typename ::testing::internal::IteratorTraits::value_type> -ValuesIn(ForwardIterator begin, ForwardIterator end) { - typedef typename ::testing::internal::IteratorTraits - ::value_type ParamType; - return internal::ParamGenerator( - new internal::ValuesInIteratorRangeGenerator(begin, end)); -} - -template -internal::ParamGenerator ValuesIn(const T (&array)[N]) { - return ValuesIn(array, array + N); -} - -template -internal::ParamGenerator ValuesIn( - const Container& container) { - return ValuesIn(container.begin(), container.end()); -} - -// Values() allows generating tests from explicitly specified list of -// parameters. -// -// Synopsis: -// Values(T v1, T v2, ..., T vN) -// - returns a generator producing sequences with elements v1, v2, ..., vN. -// -// For example, this instantiates tests from test case BarTest each -// with values "one", "two", and "three": -// -// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three")); -// -// This instantiates tests from test case BazTest each with values 1, 2, 3.5. -// The exact type of values will depend on the type of parameter in BazTest. -// -// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5)); -// -// Currently, Values() supports from 1 to 50 parameters. -// -template -internal::ValueArray1 Values(T1 v1) { - return internal::ValueArray1(v1); -} - -template -internal::ValueArray2 Values(T1 v1, T2 v2) { - return internal::ValueArray2(v1, v2); -} - -template -internal::ValueArray3 Values(T1 v1, T2 v2, T3 v3) { - return internal::ValueArray3(v1, v2, v3); -} - -template -internal::ValueArray4 Values(T1 v1, T2 v2, T3 v3, T4 v4) { - return internal::ValueArray4(v1, v2, v3, v4); -} - -template -internal::ValueArray5 Values(T1 v1, T2 v2, T3 v3, T4 v4, - T5 v5) { - return internal::ValueArray5(v1, v2, v3, v4, v5); -} - -template -internal::ValueArray6 Values(T1 v1, T2 v2, T3 v3, - T4 v4, T5 v5, T6 v6) { - return internal::ValueArray6(v1, v2, v3, v4, v5, v6); -} - -template -internal::ValueArray7 Values(T1 v1, T2 v2, T3 v3, - T4 v4, T5 v5, T6 v6, T7 v7) { - return internal::ValueArray7(v1, v2, v3, v4, v5, - v6, v7); -} - -template -internal::ValueArray8 Values(T1 v1, T2 v2, - T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) { - return internal::ValueArray8(v1, v2, v3, v4, - v5, v6, v7, v8); -} - -template -internal::ValueArray9 Values(T1 v1, T2 v2, - T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) { - return internal::ValueArray9(v1, v2, v3, - v4, v5, v6, v7, v8, v9); -} - -template -internal::ValueArray10 Values(T1 v1, - T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) { - return internal::ValueArray10(v1, - v2, v3, v4, v5, v6, v7, v8, v9, v10); -} - -template -internal::ValueArray11 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11) { - return internal::ValueArray11(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11); -} - -template -internal::ValueArray12 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12) { - return internal::ValueArray12(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12); -} - -template -internal::ValueArray13 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13) { - return internal::ValueArray13(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13); -} - -template -internal::ValueArray14 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) { - return internal::ValueArray14(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14); -} - -template -internal::ValueArray15 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, - T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) { - return internal::ValueArray15(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); -} - -template -internal::ValueArray16 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, - T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, - T16 v16) { - return internal::ValueArray16(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15, v16); -} - -template -internal::ValueArray17 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, - T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, - T16 v16, T17 v17) { - return internal::ValueArray17(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, - v11, v12, v13, v14, v15, v16, v17); -} - -template -internal::ValueArray18 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, - T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, - T16 v16, T17 v17, T18 v18) { - return internal::ValueArray18(v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15, v16, v17, v18); -} - -template -internal::ValueArray19 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, - T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, - T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) { - return internal::ValueArray19(v1, v2, v3, v4, v5, v6, v7, v8, - v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19); -} - -template -internal::ValueArray20 Values(T1 v1, T2 v2, T3 v3, T4 v4, - T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, - T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) { - return internal::ValueArray20(v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20); -} - -template -internal::ValueArray21 Values(T1 v1, T2 v2, T3 v3, T4 v4, - T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, - T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) { - return internal::ValueArray21(v1, v2, v3, v4, v5, v6, - v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21); -} - -template -internal::ValueArray22 Values(T1 v1, T2 v2, T3 v3, - T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, - T21 v21, T22 v22) { - return internal::ValueArray22(v1, v2, v3, v4, - v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, - v20, v21, v22); -} - -template -internal::ValueArray23 Values(T1 v1, T2 v2, - T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, - T21 v21, T22 v22, T23 v23) { - return internal::ValueArray23(v1, v2, v3, - v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, - v20, v21, v22, v23); -} - -template -internal::ValueArray24 Values(T1 v1, T2 v2, - T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, - T21 v21, T22 v22, T23 v23, T24 v24) { - return internal::ValueArray24(v1, v2, - v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, - v19, v20, v21, v22, v23, v24); -} - -template -internal::ValueArray25 Values(T1 v1, - T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, - T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, - T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) { - return internal::ValueArray25(v1, - v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, - v18, v19, v20, v21, v22, v23, v24, v25); -} - -template -internal::ValueArray26 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26) { - return internal::ValueArray26(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, - v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26); -} - -template -internal::ValueArray27 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27) { - return internal::ValueArray27(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, - v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27); -} - -template -internal::ValueArray28 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28) { - return internal::ValueArray28(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, - v28); -} - -template -internal::ValueArray29 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29) { - return internal::ValueArray29(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, - v27, v28, v29); -} - -template -internal::ValueArray30 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, - T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, - T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, - T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) { - return internal::ValueArray30(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, - v26, v27, v28, v29, v30); -} - -template -internal::ValueArray31 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, - T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, - T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, - T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) { - return internal::ValueArray31(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, - v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, - v25, v26, v27, v28, v29, v30, v31); -} - -template -internal::ValueArray32 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, - T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, - T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, - T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, - T32 v32) { - return internal::ValueArray32(v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, - v24, v25, v26, v27, v28, v29, v30, v31, v32); -} - -template -internal::ValueArray33 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, - T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, - T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, - T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, - T32 v32, T33 v33) { - return internal::ValueArray33(v1, v2, v3, v4, v5, v6, v7, v8, - v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, - v24, v25, v26, v27, v28, v29, v30, v31, v32, v33); -} - -template -internal::ValueArray34 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, - T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, - T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, - T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, - T31 v31, T32 v32, T33 v33, T34 v34) { - return internal::ValueArray34(v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, - v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34); -} - -template -internal::ValueArray35 Values(T1 v1, T2 v2, T3 v3, T4 v4, - T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, - T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, - T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, - T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) { - return internal::ValueArray35(v1, v2, v3, v4, v5, v6, - v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, - v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35); -} - -template -internal::ValueArray36 Values(T1 v1, T2 v2, T3 v3, T4 v4, - T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, - T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, - T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, - T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) { - return internal::ValueArray36(v1, v2, v3, v4, - v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, - v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, - v34, v35, v36); -} - -template -internal::ValueArray37 Values(T1 v1, T2 v2, T3 v3, - T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, - T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, - T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, - T37 v37) { - return internal::ValueArray37(v1, v2, v3, - v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, - v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, - v34, v35, v36, v37); -} - -template -internal::ValueArray38 Values(T1 v1, T2 v2, - T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, - T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, - T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, - T37 v37, T38 v38) { - return internal::ValueArray38(v1, v2, - v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, - v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, - v33, v34, v35, v36, v37, v38); -} - -template -internal::ValueArray39 Values(T1 v1, T2 v2, - T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, - T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, - T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, - T37 v37, T38 v38, T39 v39) { - return internal::ValueArray39(v1, - v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, - v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, - v32, v33, v34, v35, v36, v37, v38, v39); -} - -template -internal::ValueArray40 Values(T1 v1, - T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, - T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, - T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, - T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, - T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) { - return internal::ValueArray40(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, - v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, - v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40); -} - -template -internal::ValueArray41 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) { - return internal::ValueArray41(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, - v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, - v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41); -} - -template -internal::ValueArray42 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42) { - return internal::ValueArray42(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, - v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, - v42); -} - -template -internal::ValueArray43 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42, T43 v43) { - return internal::ValueArray43(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, - v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, - v41, v42, v43); -} - -template -internal::ValueArray44 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, - T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, - T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, - T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, - T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, - T42 v42, T43 v43, T44 v44) { - return internal::ValueArray44(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, - v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, - v40, v41, v42, v43, v44); -} - -template -internal::ValueArray45 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, - T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, - T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, - T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, - T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, - T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) { - return internal::ValueArray45(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, - v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, - v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, - v39, v40, v41, v42, v43, v44, v45); -} - -template -internal::ValueArray46 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, - T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, - T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, - T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, - T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, - T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) { - return internal::ValueArray46(v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, - v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, - v38, v39, v40, v41, v42, v43, v44, v45, v46); -} - -template -internal::ValueArray47 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, - T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, - T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, - T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, - T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, - T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) { - return internal::ValueArray47(v1, v2, v3, v4, v5, v6, v7, v8, - v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, - v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, - v38, v39, v40, v41, v42, v43, v44, v45, v46, v47); -} - -template -internal::ValueArray48 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, - T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, - T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, - T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, - T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, - T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, - T48 v48) { - return internal::ValueArray48(v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, - v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, - v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48); -} - -template -internal::ValueArray49 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, - T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, - T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, - T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, - T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, - T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, - T47 v47, T48 v48, T49 v49) { - return internal::ValueArray49(v1, v2, v3, v4, v5, v6, - v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, - v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, - v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49); -} - -template -internal::ValueArray50 Values(T1 v1, T2 v2, T3 v3, T4 v4, - T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, - T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, - T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, - T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, - T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, - T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) { - return internal::ValueArray50(v1, v2, v3, v4, - v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, - v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, - v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, - v48, v49, v50); -} - -// Bool() allows generating tests with parameters in a set of (false, true). -// -// Synopsis: -// Bool() -// - returns a generator producing sequences with elements {false, true}. -// -// It is useful when testing code that depends on Boolean flags. Combinations -// of multiple flags can be tested when several Bool()'s are combined using -// Combine() function. -// -// In the following example all tests in the test case FlagDependentTest -// will be instantiated twice with parameters false and true. -// -// class FlagDependentTest : public testing::TestWithParam { -// virtual void SetUp() { -// external_flag = GetParam(); -// } -// } -// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool()); -// -inline internal::ParamGenerator Bool() { - return Values(false, true); -} - -# if GTEST_HAS_COMBINE -// Combine() allows the user to combine two or more sequences to produce -// values of a Cartesian product of those sequences' elements. -// -// Synopsis: -// Combine(gen1, gen2, ..., genN) -// - returns a generator producing sequences with elements coming from -// the Cartesian product of elements from the sequences generated by -// gen1, gen2, ..., genN. The sequence elements will have a type of -// tuple where T1, T2, ..., TN are the types -// of elements from sequences produces by gen1, gen2, ..., genN. -// -// Combine can have up to 10 arguments. This number is currently limited -// by the maximum number of elements in the tuple implementation used by Google -// Test. -// -// Example: -// -// This will instantiate tests in test case AnimalTest each one with -// the parameter values tuple("cat", BLACK), tuple("cat", WHITE), -// tuple("dog", BLACK), and tuple("dog", WHITE): -// -// enum Color { BLACK, GRAY, WHITE }; -// class AnimalTest -// : public testing::TestWithParam > {...}; -// -// TEST_P(AnimalTest, AnimalLooksNice) {...} -// -// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest, -// Combine(Values("cat", "dog"), -// Values(BLACK, WHITE))); -// -// This will instantiate tests in FlagDependentTest with all variations of two -// Boolean flags: -// -// class FlagDependentTest -// : public testing::TestWithParam > { -// virtual void SetUp() { -// // Assigns external_flag_1 and external_flag_2 values from the tuple. -// tie(external_flag_1, external_flag_2) = GetParam(); -// } -// }; -// -// TEST_P(FlagDependentTest, TestFeature1) { -// // Test your code using external_flag_1 and external_flag_2 here. -// } -// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest, -// Combine(Bool(), Bool())); -// -template -internal::CartesianProductHolder2 Combine( - const Generator1& g1, const Generator2& g2) { - return internal::CartesianProductHolder2( - g1, g2); -} - -template -internal::CartesianProductHolder3 Combine( - const Generator1& g1, const Generator2& g2, const Generator3& g3) { - return internal::CartesianProductHolder3( - g1, g2, g3); -} - -template -internal::CartesianProductHolder4 Combine( - const Generator1& g1, const Generator2& g2, const Generator3& g3, - const Generator4& g4) { - return internal::CartesianProductHolder4( - g1, g2, g3, g4); -} - -template -internal::CartesianProductHolder5 Combine( - const Generator1& g1, const Generator2& g2, const Generator3& g3, - const Generator4& g4, const Generator5& g5) { - return internal::CartesianProductHolder5( - g1, g2, g3, g4, g5); -} - -template -internal::CartesianProductHolder6 Combine( - const Generator1& g1, const Generator2& g2, const Generator3& g3, - const Generator4& g4, const Generator5& g5, const Generator6& g6) { - return internal::CartesianProductHolder6( - g1, g2, g3, g4, g5, g6); -} - -template -internal::CartesianProductHolder7 Combine( - const Generator1& g1, const Generator2& g2, const Generator3& g3, - const Generator4& g4, const Generator5& g5, const Generator6& g6, - const Generator7& g7) { - return internal::CartesianProductHolder7( - g1, g2, g3, g4, g5, g6, g7); -} - -template -internal::CartesianProductHolder8 Combine( - const Generator1& g1, const Generator2& g2, const Generator3& g3, - const Generator4& g4, const Generator5& g5, const Generator6& g6, - const Generator7& g7, const Generator8& g8) { - return internal::CartesianProductHolder8( - g1, g2, g3, g4, g5, g6, g7, g8); -} - -template -internal::CartesianProductHolder9 Combine( - const Generator1& g1, const Generator2& g2, const Generator3& g3, - const Generator4& g4, const Generator5& g5, const Generator6& g6, - const Generator7& g7, const Generator8& g8, const Generator9& g9) { - return internal::CartesianProductHolder9( - g1, g2, g3, g4, g5, g6, g7, g8, g9); -} - -template -internal::CartesianProductHolder10 Combine( - const Generator1& g1, const Generator2& g2, const Generator3& g3, - const Generator4& g4, const Generator5& g5, const Generator6& g6, - const Generator7& g7, const Generator8& g8, const Generator9& g9, - const Generator10& g10) { - return internal::CartesianProductHolder10( - g1, g2, g3, g4, g5, g6, g7, g8, g9, g10); -} -# endif // GTEST_HAS_COMBINE - - - -# define TEST_P(test_case_name, test_name) \ - class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ - : public test_case_name { \ - public: \ - GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \ - virtual void TestBody(); \ - private: \ - static int AddToRegistry() { \ - ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ - GetTestCasePatternHolder(\ - #test_case_name, __FILE__, __LINE__)->AddTestPattern(\ - #test_case_name, \ - #test_name, \ - new ::testing::internal::TestMetaFactory< \ - GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \ - return 0; \ - } \ - static int gtest_registering_dummy_; \ - GTEST_DISALLOW_COPY_AND_ASSIGN_(\ - GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \ - }; \ - int GTEST_TEST_CLASS_NAME_(test_case_name, \ - test_name)::gtest_registering_dummy_ = \ - GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \ - void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() - -# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \ - ::testing::internal::ParamGenerator \ - gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \ - int gtest_##prefix##test_case_name##_dummy_ = \ - ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ - GetTestCasePatternHolder(\ - #test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\ - #prefix, \ - >est_##prefix##test_case_name##_EvalGenerator_, \ - __FILE__, __LINE__) - -} // namespace testing - -#endif // GTEST_HAS_PARAM_TEST - -#endif // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ -// Copyright 2006, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) -// -// Google C++ Testing Framework definitions useful in production code. - -#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_ -#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_ - -// When you need to test the private or protected members of a class, -// use the FRIEND_TEST macro to declare your tests as friends of the -// class. For example: -// -// class MyClass { -// private: -// void MyMethod(); -// FRIEND_TEST(MyClassTest, MyMethod); -// }; -// -// class MyClassTest : public testing::Test { -// // ... -// }; -// -// TEST_F(MyClassTest, MyMethod) { -// // Can call MyClass::MyMethod() here. -// } - -#define FRIEND_TEST(test_case_name, test_name)\ -friend class test_case_name##_##test_name##_Test - -#endif // GTEST_INCLUDE_GTEST_GTEST_PROD_H_ -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: mheule@google.com (Markus Heule) -// - -#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ -#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ - -#include -#include - -namespace testing { - -// A copyable object representing the result of a test part (i.e. an -// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()). -// -// Don't inherit from TestPartResult as its destructor is not virtual. -class GTEST_API_ TestPartResult { - public: - // The possible outcomes of a test part (i.e. an assertion or an - // explicit SUCCEED(), FAIL(), or ADD_FAILURE()). - enum Type { - kSuccess, // Succeeded. - kNonFatalFailure, // Failed but the test can continue. - kFatalFailure // Failed and the test should be terminated. - }; - - // C'tor. TestPartResult does NOT have a default constructor. - // Always use this constructor (with parameters) to create a - // TestPartResult object. - TestPartResult(Type a_type, - const char* a_file_name, - int a_line_number, - const char* a_message) - : type_(a_type), - file_name_(a_file_name), - line_number_(a_line_number), - summary_(ExtractSummary(a_message)), - message_(a_message) { - } - - // Gets the outcome of the test part. - Type type() const { return type_; } - - // Gets the name of the source file where the test part took place, or - // NULL if it's unknown. - const char* file_name() const { return file_name_.c_str(); } - - // Gets the line in the source file where the test part took place, - // or -1 if it's unknown. - int line_number() const { return line_number_; } - - // Gets the summary of the failure message. - const char* summary() const { return summary_.c_str(); } - - // Gets the message associated with the test part. - const char* message() const { return message_.c_str(); } - - // Returns true iff the test part passed. - bool passed() const { return type_ == kSuccess; } - - // Returns true iff the test part failed. - bool failed() const { return type_ != kSuccess; } - - // Returns true iff the test part non-fatally failed. - bool nonfatally_failed() const { return type_ == kNonFatalFailure; } - - // Returns true iff the test part fatally failed. - bool fatally_failed() const { return type_ == kFatalFailure; } - private: - Type type_; - - // Gets the summary of the failure message by omitting the stack - // trace in it. - static internal::String ExtractSummary(const char* message); - - // The name of the source file where the test part took place, or - // NULL if the source file is unknown. - internal::String file_name_; - // The line in the source file where the test part took place, or -1 - // if the line number is unknown. - int line_number_; - internal::String summary_; // The test failure summary. - internal::String message_; // The test failure message. -}; - -// Prints a TestPartResult object. -std::ostream& operator<<(std::ostream& os, const TestPartResult& result); - -// An array of TestPartResult objects. -// -// Don't inherit from TestPartResultArray as its destructor is not -// virtual. -class GTEST_API_ TestPartResultArray { - public: - TestPartResultArray() {} - - // Appends the given TestPartResult to the array. - void Append(const TestPartResult& result); - - // Returns the TestPartResult at the given index (0-based). - const TestPartResult& GetTestPartResult(int index) const; - - // Returns the number of TestPartResult objects in the array. - int size() const; - - private: - std::vector array_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray); -}; - -// This interface knows how to report a test part result. -class TestPartResultReporterInterface { - public: - virtual ~TestPartResultReporterInterface() {} - - virtual void ReportTestPartResult(const TestPartResult& result) = 0; -}; - -namespace internal { - -// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a -// statement generates new fatal failures. To do so it registers itself as the -// current test part result reporter. Besides checking if fatal failures were -// reported, it only delegates the reporting to the former result reporter. -// The original result reporter is restored in the destructor. -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -class GTEST_API_ HasNewFatalFailureHelper - : public TestPartResultReporterInterface { - public: - HasNewFatalFailureHelper(); - virtual ~HasNewFatalFailureHelper(); - virtual void ReportTestPartResult(const TestPartResult& result); - bool has_new_fatal_failure() const { return has_new_fatal_failure_; } - private: - bool has_new_fatal_failure_; - TestPartResultReporterInterface* original_reporter_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper); -}; - -} // namespace internal - -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ -// Copyright 2008 Google Inc. -// All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) - -#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ -#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ - -// This header implements typed tests and type-parameterized tests. - -// Typed (aka type-driven) tests repeat the same test for types in a -// list. You must know which types you want to test with when writing -// typed tests. Here's how you do it: - -#if 0 - -// First, define a fixture class template. It should be parameterized -// by a type. Remember to derive it from testing::Test. -template -class FooTest : public testing::Test { - public: - ... - typedef std::list List; - static T shared_; - T value_; -}; - -// Next, associate a list of types with the test case, which will be -// repeated for each type in the list. The typedef is necessary for -// the macro to parse correctly. -typedef testing::Types MyTypes; -TYPED_TEST_CASE(FooTest, MyTypes); - -// If the type list contains only one type, you can write that type -// directly without Types<...>: -// TYPED_TEST_CASE(FooTest, int); - -// Then, use TYPED_TEST() instead of TEST_F() to define as many typed -// tests for this test case as you want. -TYPED_TEST(FooTest, DoesBlah) { - // Inside a test, refer to TypeParam to get the type parameter. - // Since we are inside a derived class template, C++ requires use to - // visit the members of FooTest via 'this'. - TypeParam n = this->value_; - - // To visit static members of the fixture, add the TestFixture:: - // prefix. - n += TestFixture::shared_; - - // To refer to typedefs in the fixture, add the "typename - // TestFixture::" prefix. - typename TestFixture::List values; - values.push_back(n); - ... -} - -TYPED_TEST(FooTest, HasPropertyA) { ... } - -#endif // 0 - -// Type-parameterized tests are abstract test patterns parameterized -// by a type. Compared with typed tests, type-parameterized tests -// allow you to define the test pattern without knowing what the type -// parameters are. The defined pattern can be instantiated with -// different types any number of times, in any number of translation -// units. -// -// If you are designing an interface or concept, you can define a -// suite of type-parameterized tests to verify properties that any -// valid implementation of the interface/concept should have. Then, -// each implementation can easily instantiate the test suite to verify -// that it conforms to the requirements, without having to write -// similar tests repeatedly. Here's an example: - -#if 0 - -// First, define a fixture class template. It should be parameterized -// by a type. Remember to derive it from testing::Test. -template -class FooTest : public testing::Test { - ... -}; - -// Next, declare that you will define a type-parameterized test case -// (the _P suffix is for "parameterized" or "pattern", whichever you -// prefer): -TYPED_TEST_CASE_P(FooTest); - -// Then, use TYPED_TEST_P() to define as many type-parameterized tests -// for this type-parameterized test case as you want. -TYPED_TEST_P(FooTest, DoesBlah) { - // Inside a test, refer to TypeParam to get the type parameter. - TypeParam n = 0; - ... -} - -TYPED_TEST_P(FooTest, HasPropertyA) { ... } - -// Now the tricky part: you need to register all test patterns before -// you can instantiate them. The first argument of the macro is the -// test case name; the rest are the names of the tests in this test -// case. -REGISTER_TYPED_TEST_CASE_P(FooTest, - DoesBlah, HasPropertyA); - -// Finally, you are free to instantiate the pattern with the types you -// want. If you put the above code in a header file, you can #include -// it in multiple C++ source files and instantiate it multiple times. -// -// To distinguish different instances of the pattern, the first -// argument to the INSTANTIATE_* macro is a prefix that will be added -// to the actual test case name. Remember to pick unique prefixes for -// different instances. -typedef testing::Types MyTypes; -INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes); - -// If the type list contains only one type, you can write that type -// directly without Types<...>: -// INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int); - -#endif // 0 - - -// Implements typed tests. - -#if GTEST_HAS_TYPED_TEST - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// Expands to the name of the typedef for the type parameters of the -// given test case. -# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_ - -// The 'Types' template argument below must have spaces around it -// since some compilers may choke on '>>' when passing a template -// instance (e.g. Types) -# define TYPED_TEST_CASE(CaseName, Types) \ - typedef ::testing::internal::TypeList< Types >::type \ - GTEST_TYPE_PARAMS_(CaseName) - -# define TYPED_TEST(CaseName, TestName) \ - template \ - class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \ - : public CaseName { \ - private: \ - typedef CaseName TestFixture; \ - typedef gtest_TypeParam_ TypeParam; \ - virtual void TestBody(); \ - }; \ - bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \ - ::testing::internal::TypeParameterizedTest< \ - CaseName, \ - ::testing::internal::TemplateSel< \ - GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \ - GTEST_TYPE_PARAMS_(CaseName)>::Register(\ - "", #CaseName, #TestName, 0); \ - template \ - void GTEST_TEST_CLASS_NAME_(CaseName, TestName)::TestBody() - -#endif // GTEST_HAS_TYPED_TEST - -// Implements type-parameterized tests. - -#if GTEST_HAS_TYPED_TEST_P - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// Expands to the namespace name that the type-parameterized tests for -// the given type-parameterized test case are defined in. The exact -// name of the namespace is subject to change without notice. -# define GTEST_CASE_NAMESPACE_(TestCaseName) \ - gtest_case_##TestCaseName##_ - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// -// Expands to the name of the variable used to remember the names of -// the defined tests in the given test case. -# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \ - gtest_typed_test_case_p_state_##TestCaseName##_ - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY. -// -// Expands to the name of the variable used to remember the names of -// the registered tests in the given test case. -# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \ - gtest_registered_test_names_##TestCaseName##_ - -// The variables defined in the type-parameterized test macros are -// static as typically these macros are used in a .h file that can be -// #included in multiple translation units linked together. -# define TYPED_TEST_CASE_P(CaseName) \ - static ::testing::internal::TypedTestCasePState \ - GTEST_TYPED_TEST_CASE_P_STATE_(CaseName) - -# define TYPED_TEST_P(CaseName, TestName) \ - namespace GTEST_CASE_NAMESPACE_(CaseName) { \ - template \ - class TestName : public CaseName { \ - private: \ - typedef CaseName TestFixture; \ - typedef gtest_TypeParam_ TypeParam; \ - virtual void TestBody(); \ - }; \ - static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \ - GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\ - __FILE__, __LINE__, #CaseName, #TestName); \ - } \ - template \ - void GTEST_CASE_NAMESPACE_(CaseName)::TestName::TestBody() - -/* # define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \ */ -/* namespace GTEST_CASE_NAMESPACE_(CaseName) { \ */ -/* typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \ */ -/* } \ */ -/* static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \ */ -/* GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\ */ -/* __FILE__, __LINE__, #__VA_ARGS__) */ - -// The 'Types' template argument below must have spaces around it -// since some compilers may choke on '>>' when passing a template -// instance (e.g. Types) -# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \ - bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \ - ::testing::internal::TypeParameterizedTestCase::type>::Register(\ - #Prefix, #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName)) - -#endif // GTEST_HAS_TYPED_TEST_P - -#endif // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ - -// Depending on the platform, different string classes are available. -// On Linux, in addition to ::std::string, Google also makes use of -// class ::string, which has the same interface as ::std::string, but -// has a different implementation. -// -// The user can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that -// ::string is available AND is a distinct type to ::std::string, or -// define it to 0 to indicate otherwise. -// -// If the user's ::std::string and ::string are the same class due to -// aliasing, he should define GTEST_HAS_GLOBAL_STRING to 0. -// -// If the user doesn't define GTEST_HAS_GLOBAL_STRING, it is defined -// heuristically. - -namespace testing { - -// Declares the flags. - -// This flag temporary enables the disabled tests. -GTEST_DECLARE_bool_(also_run_disabled_tests); - -// This flag brings the debugger on an assertion failure. -GTEST_DECLARE_bool_(break_on_failure); - -// This flag controls whether Google Test catches all test-thrown exceptions -// and logs them as failures. -GTEST_DECLARE_bool_(catch_exceptions); - -// This flag enables using colors in terminal output. Available values are -// "yes" to enable colors, "no" (disable colors), or "auto" (the default) -// to let Google Test decide. -GTEST_DECLARE_string_(color); - -// This flag sets up the filter to select by name using a glob pattern -// the tests to run. If the filter is not given all tests are executed. -GTEST_DECLARE_string_(filter); - -// This flag causes the Google Test to list tests. None of the tests listed -// are actually run if the flag is provided. -GTEST_DECLARE_bool_(list_tests); - -// This flag controls whether Google Test emits a detailed XML report to a file -// in addition to its normal textual output. -GTEST_DECLARE_string_(output); - -// This flags control whether Google Test prints the elapsed time for each -// test. -GTEST_DECLARE_bool_(print_time); - -// This flag specifies the random number seed. -GTEST_DECLARE_int32_(random_seed); - -// This flag sets how many times the tests are repeated. The default value -// is 1. If the value is -1 the tests are repeating forever. -GTEST_DECLARE_int32_(repeat); - -// This flag controls whether Google Test includes Google Test internal -// stack frames in failure stack traces. -GTEST_DECLARE_bool_(show_internal_stack_frames); - -// When this flag is specified, tests' order is randomized on every iteration. -GTEST_DECLARE_bool_(shuffle); - -// This flag specifies the maximum number of stack frames to be -// printed in a failure message. -GTEST_DECLARE_int32_(stack_trace_depth); - -// When this flag is specified, a failed assertion will throw an -// exception if exceptions are enabled, or exit the program with a -// non-zero code otherwise. -GTEST_DECLARE_bool_(throw_on_failure); - -// When this flag is set with a "host:port" string, on supported -// platforms test results are streamed to the specified port on -// the specified host machine. -GTEST_DECLARE_string_(stream_result_to); - -// The upper limit for valid stack trace depths. -const int kMaxStackTraceDepth = 100; - -namespace internal { - -class AssertHelper; -class DefaultGlobalTestPartResultReporter; -class ExecDeathTest; -class NoExecDeathTest; -class FinalSuccessChecker; -class GTestFlagSaver; -class TestResultAccessor; -class TestEventListenersAccessor; -class TestEventRepeater; -class WindowsDeathTest; -class UnitTestImpl* GetUnitTestImpl(); -void ReportFailureInUnknownLocation(TestPartResult::Type result_type, - const String& message); - -// Converts a streamable value to a String. A NULL pointer is -// converted to "(null)". When the input value is a ::string, -// ::std::string, ::wstring, or ::std::wstring object, each NUL -// character in it is replaced with "\\0". -// Declared in gtest-internal.h but defined here, so that it has access -// to the definition of the Message class, required by the ARM -// compiler. -template -String StreamableToString(const T& streamable) { - return (Message() << streamable).GetString(); -} - -} // namespace internal - -// The friend relationship of some of these classes is cyclic. -// If we don't forward declare them the compiler might confuse the classes -// in friendship clauses with same named classes on the scope. -class Test; -class TestCase; -class TestInfo; -class UnitTest; - -// A class for indicating whether an assertion was successful. When -// the assertion wasn't successful, the AssertionResult object -// remembers a non-empty message that describes how it failed. -// -// To create an instance of this class, use one of the factory functions -// (AssertionSuccess() and AssertionFailure()). -// -// This class is useful for two purposes: -// 1. Defining predicate functions to be used with Boolean test assertions -// EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts -// 2. Defining predicate-format functions to be -// used with predicate assertions (ASSERT_PRED_FORMAT*, etc). -// -// For example, if you define IsEven predicate: -// -// testing::AssertionResult IsEven(int n) { -// if ((n % 2) == 0) -// return testing::AssertionSuccess(); -// else -// return testing::AssertionFailure() << n << " is odd"; -// } -// -// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5))) -// will print the message -// -// Value of: IsEven(Fib(5)) -// Actual: false (5 is odd) -// Expected: true -// -// instead of a more opaque -// -// Value of: IsEven(Fib(5)) -// Actual: false -// Expected: true -// -// in case IsEven is a simple Boolean predicate. -// -// If you expect your predicate to be reused and want to support informative -// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up -// about half as often as positive ones in our tests), supply messages for -// both success and failure cases: -// -// testing::AssertionResult IsEven(int n) { -// if ((n % 2) == 0) -// return testing::AssertionSuccess() << n << " is even"; -// else -// return testing::AssertionFailure() << n << " is odd"; -// } -// -// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print -// -// Value of: IsEven(Fib(6)) -// Actual: true (8 is even) -// Expected: false -// -// NB: Predicates that support negative Boolean assertions have reduced -// performance in positive ones so be careful not to use them in tests -// that have lots (tens of thousands) of positive Boolean assertions. -// -// To use this class with EXPECT_PRED_FORMAT assertions such as: -// -// // Verifies that Foo() returns an even number. -// EXPECT_PRED_FORMAT1(IsEven, Foo()); -// -// you need to define: -// -// testing::AssertionResult IsEven(const char* expr, int n) { -// if ((n % 2) == 0) -// return testing::AssertionSuccess(); -// else -// return testing::AssertionFailure() -// << "Expected: " << expr << " is even\n Actual: it's " << n; -// } -// -// If Foo() returns 5, you will see the following message: -// -// Expected: Foo() is even -// Actual: it's 5 -// -class GTEST_API_ AssertionResult { - public: - // Copy constructor. - // Used in EXPECT_TRUE/FALSE(assertion_result). - AssertionResult(const AssertionResult& other); - // Used in the EXPECT_TRUE/FALSE(bool_expression). - explicit AssertionResult(bool success) : success_(success) {} - - // Returns true iff the assertion succeeded. - operator bool() const { return success_; } // NOLINT - - // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE. - AssertionResult operator!() const; - - // Returns the text streamed into this AssertionResult. Test assertions - // use it when they fail (i.e., the predicate's outcome doesn't match the - // assertion's expectation). When nothing has been streamed into the - // object, returns an empty string. - const char* message() const { - return message_.get() != NULL ? message_->c_str() : ""; - } - // TODO(vladl@google.com): Remove this after making sure no clients use it. - // Deprecated; please use message() instead. - const char* failure_message() const { return message(); } - - // Streams a custom failure message into this object. - template AssertionResult& operator<<(const T& value) { - AppendMessage(Message() << value); - return *this; - } - - // Allows streaming basic output manipulators such as endl or flush into - // this object. - AssertionResult& operator<<( - ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) { - AppendMessage(Message() << basic_manipulator); - return *this; - } - - private: - // Appends the contents of message to message_. - void AppendMessage(const Message& a_message) { - if (message_.get() == NULL) - message_.reset(new ::std::string); - message_->append(a_message.GetString().c_str()); - } - - // Stores result of the assertion predicate. - bool success_; - // Stores the message describing the condition in case the expectation - // construct is not satisfied with the predicate's outcome. - // Referenced via a pointer to avoid taking too much stack frame space - // with test assertions. - internal::scoped_ptr< ::std::string> message_; - - GTEST_DISALLOW_ASSIGN_(AssertionResult); -}; - -// Makes a successful assertion result. -GTEST_API_ AssertionResult AssertionSuccess(); - -// Makes a failed assertion result. -GTEST_API_ AssertionResult AssertionFailure(); - -// Makes a failed assertion result with the given failure message. -// Deprecated; use AssertionFailure() << msg. -GTEST_API_ AssertionResult AssertionFailure(const Message& msg); - -// The abstract class that all tests inherit from. -// -// In Google Test, a unit test program contains one or many TestCases, and -// each TestCase contains one or many Tests. -// -// When you define a test using the TEST macro, you don't need to -// explicitly derive from Test - the TEST macro automatically does -// this for you. -// -// The only time you derive from Test is when defining a test fixture -// to be used a TEST_F. For example: -// -// class FooTest : public testing::Test { -// protected: -// virtual void SetUp() { ... } -// virtual void TearDown() { ... } -// ... -// }; -// -// TEST_F(FooTest, Bar) { ... } -// TEST_F(FooTest, Baz) { ... } -// -// Test is not copyable. -class GTEST_API_ Test { - public: - friend class TestInfo; - - // Defines types for pointers to functions that set up and tear down - // a test case. - typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc; - typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc; - - // The d'tor is virtual as we intend to inherit from Test. - virtual ~Test(); - - // Sets up the stuff shared by all tests in this test case. - // - // Google Test will call Foo::SetUpTestCase() before running the first - // test in test case Foo. Hence a sub-class can define its own - // SetUpTestCase() method to shadow the one defined in the super - // class. - static void SetUpTestCase() {} - - // Tears down the stuff shared by all tests in this test case. - // - // Google Test will call Foo::TearDownTestCase() after running the last - // test in test case Foo. Hence a sub-class can define its own - // TearDownTestCase() method to shadow the one defined in the super - // class. - static void TearDownTestCase() {} - - // Returns true iff the current test has a fatal failure. - static bool HasFatalFailure(); - - // Returns true iff the current test has a non-fatal failure. - static bool HasNonfatalFailure(); - - // Returns true iff the current test has a (either fatal or - // non-fatal) failure. - static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); } - - // Logs a property for the current test. Only the last value for a given - // key is remembered. - // These are public static so they can be called from utility functions - // that are not members of the test fixture. - // The arguments are const char* instead strings, as Google Test is used - // on platforms where string doesn't compile. - // - // Note that a driving consideration for these RecordProperty methods - // was to produce xml output suited to the Greenspan charting utility, - // which at present will only chart values that fit in a 32-bit int. It - // is the user's responsibility to restrict their values to 32-bit ints - // if they intend them to be used with Greenspan. - static void RecordProperty(const char* key, const char* value); - static void RecordProperty(const char* key, int value); - - protected: - // Creates a Test object. - Test(); - - // Sets up the test fixture. - virtual void SetUp(); - - // Tears down the test fixture. - virtual void TearDown(); - - private: - // Returns true iff the current test has the same fixture class as - // the first test in the current test case. - static bool HasSameFixtureClass(); - - // Runs the test after the test fixture has been set up. - // - // A sub-class must implement this to define the test logic. - // - // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM. - // Instead, use the TEST or TEST_F macro. - virtual void TestBody() = 0; - - // Sets up, executes, and tears down the test. - void Run(); - - // Deletes self. We deliberately pick an unusual name for this - // internal method to avoid clashing with names used in user TESTs. - void DeleteSelf_() { delete this; } - - // Uses a GTestFlagSaver to save and restore all Google Test flags. - const internal::GTestFlagSaver* const gtest_flag_saver_; - - // Often a user mis-spells SetUp() as Setup() and spends a long time - // wondering why it is never called by Google Test. The declaration of - // the following method is solely for catching such an error at - // compile time: - // - // - The return type is deliberately chosen to be not void, so it - // will be a conflict if a user declares void Setup() in his test - // fixture. - // - // - This method is private, so it will be another compiler error - // if a user calls it from his test fixture. - // - // DO NOT OVERRIDE THIS FUNCTION. - // - // If you see an error about overriding the following function or - // about it being private, you have mis-spelled SetUp() as Setup(). - struct Setup_should_be_spelled_SetUp {}; - virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } - - // We disallow copying Tests. - GTEST_DISALLOW_COPY_AND_ASSIGN_(Test); -}; - -typedef internal::TimeInMillis TimeInMillis; - -// A copyable object representing a user specified test property which can be -// output as a key/value string pair. -// -// Don't inherit from TestProperty as its destructor is not virtual. -class TestProperty { - public: - // C'tor. TestProperty does NOT have a default constructor. - // Always use this constructor (with parameters) to create a - // TestProperty object. - TestProperty(const char* a_key, const char* a_value) : - key_(a_key), value_(a_value) { - } - - // Gets the user supplied key. - const char* key() const { - return key_.c_str(); - } - - // Gets the user supplied value. - const char* value() const { - return value_.c_str(); - } - - // Sets a new value, overriding the one supplied in the constructor. - void SetValue(const char* new_value) { - value_ = new_value; - } - - private: - // The key supplied by the user. - internal::String key_; - // The value supplied by the user. - internal::String value_; -}; - -// The result of a single Test. This includes a list of -// TestPartResults, a list of TestProperties, a count of how many -// death tests there are in the Test, and how much time it took to run -// the Test. -// -// TestResult is not copyable. -class GTEST_API_ TestResult { - public: - // Creates an empty TestResult. - TestResult(); - - // D'tor. Do not inherit from TestResult. - ~TestResult(); - - // Gets the number of all test parts. This is the sum of the number - // of successful test parts and the number of failed test parts. - int total_part_count() const; - - // Returns the number of the test properties. - int test_property_count() const; - - // Returns true iff the test passed (i.e. no test part failed). - bool Passed() const { return !Failed(); } - - // Returns true iff the test failed. - bool Failed() const; - - // Returns true iff the test fatally failed. - bool HasFatalFailure() const; - - // Returns true iff the test has a non-fatal failure. - bool HasNonfatalFailure() const; - - // Returns the elapsed time, in milliseconds. - TimeInMillis elapsed_time() const { return elapsed_time_; } - - // Returns the i-th test part result among all the results. i can range - // from 0 to test_property_count() - 1. If i is not in that range, aborts - // the program. - const TestPartResult& GetTestPartResult(int i) const; - - // Returns the i-th test property. i can range from 0 to - // test_property_count() - 1. If i is not in that range, aborts the - // program. - const TestProperty& GetTestProperty(int i) const; - - private: - friend class TestInfo; - friend class UnitTest; - friend class internal::DefaultGlobalTestPartResultReporter; - friend class internal::ExecDeathTest; - friend class internal::TestResultAccessor; - friend class internal::UnitTestImpl; - friend class internal::WindowsDeathTest; - - // Gets the vector of TestPartResults. - const std::vector& test_part_results() const { - return test_part_results_; - } - - // Gets the vector of TestProperties. - const std::vector& test_properties() const { - return test_properties_; - } - - // Sets the elapsed time. - void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; } - - // Adds a test property to the list. The property is validated and may add - // a non-fatal failure if invalid (e.g., if it conflicts with reserved - // key names). If a property is already recorded for the same key, the - // value will be updated, rather than storing multiple values for the same - // key. - void RecordProperty(const TestProperty& test_property); - - // Adds a failure if the key is a reserved attribute of Google Test - // testcase tags. Returns true if the property is valid. - // TODO(russr): Validate attribute names are legal and human readable. - static bool ValidateTestProperty(const TestProperty& test_property); - - // Adds a test part result to the list. - void AddTestPartResult(const TestPartResult& test_part_result); - - // Returns the death test count. - int death_test_count() const { return death_test_count_; } - - // Increments the death test count, returning the new count. - int increment_death_test_count() { return ++death_test_count_; } - - // Clears the test part results. - void ClearTestPartResults(); - - // Clears the object. - void Clear(); - - // Protects mutable state of the property vector and of owned - // properties, whose values may be updated. - internal::Mutex test_properites_mutex_; - - // The vector of TestPartResults - std::vector test_part_results_; - // The vector of TestProperties - std::vector test_properties_; - // Running count of death tests. - int death_test_count_; - // The elapsed time, in milliseconds. - TimeInMillis elapsed_time_; - - // We disallow copying TestResult. - GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult); -}; // class TestResult - -// A TestInfo object stores the following information about a test: -// -// Test case name -// Test name -// Whether the test should be run -// A function pointer that creates the test object when invoked -// Test result -// -// The constructor of TestInfo registers itself with the UnitTest -// singleton such that the RUN_ALL_TESTS() macro knows which tests to -// run. -class GTEST_API_ TestInfo { - public: - // Destructs a TestInfo object. This function is not virtual, so - // don't inherit from TestInfo. - ~TestInfo(); - - // Returns the test case name. - const char* test_case_name() const { return test_case_name_.c_str(); } - - // Returns the test name. - const char* name() const { return name_.c_str(); } - - // Returns the name of the parameter type, or NULL if this is not a typed - // or a type-parameterized test. - const char* type_param() const { - if (type_param_.get() != NULL) - return type_param_->c_str(); - return NULL; - } - - // Returns the text representation of the value parameter, or NULL if this - // is not a value-parameterized test. - const char* value_param() const { - if (value_param_.get() != NULL) - return value_param_->c_str(); - return NULL; - } - - // Returns true if this test should run, that is if the test is not disabled - // (or it is disabled but the also_run_disabled_tests flag has been specified) - // and its full name matches the user-specified filter. - // - // Google Test allows the user to filter the tests by their full names. - // The full name of a test Bar in test case Foo is defined as - // "Foo.Bar". Only the tests that match the filter will run. - // - // A filter is a colon-separated list of glob (not regex) patterns, - // optionally followed by a '-' and a colon-separated list of - // negative patterns (tests to exclude). A test is run if it - // matches one of the positive patterns and does not match any of - // the negative patterns. - // - // For example, *A*:Foo.* is a filter that matches any string that - // contains the character 'A' or starts with "Foo.". - bool should_run() const { return should_run_; } - - // Returns the result of the test. - const TestResult* result() const { return &result_; } - - private: - -#if GTEST_HAS_DEATH_TEST - friend class internal::DefaultDeathTestFactory; -#endif // GTEST_HAS_DEATH_TEST - friend class Test; - friend class TestCase; - friend class internal::UnitTestImpl; - friend TestInfo* internal::MakeAndRegisterTestInfo( - const char* test_case_name, const char* name, - const char* type_param, - const char* value_param, - internal::TypeId fixture_class_id, - Test::SetUpTestCaseFunc set_up_tc, - Test::TearDownTestCaseFunc tear_down_tc, - internal::TestFactoryBase* factory); - - // Constructs a TestInfo object. The newly constructed instance assumes - // ownership of the factory object. - TestInfo(const char* test_case_name, const char* name, - const char* a_type_param, - const char* a_value_param, - internal::TypeId fixture_class_id, - internal::TestFactoryBase* factory); - - // Increments the number of death tests encountered in this test so - // far. - int increment_death_test_count() { - return result_.increment_death_test_count(); - } - - // Creates the test object, runs it, records its result, and then - // deletes it. - void Run(); - - static void ClearTestResult(TestInfo* test_info) { - test_info->result_.Clear(); - } - - // These fields are immutable properties of the test. - const std::string test_case_name_; // Test case name - const std::string name_; // Test name - // Name of the parameter type, or NULL if this is not a typed or a - // type-parameterized test. - const internal::scoped_ptr type_param_; - // Text representation of the value parameter, or NULL if this is not a - // value-parameterized test. - const internal::scoped_ptr value_param_; - const internal::TypeId fixture_class_id_; // ID of the test fixture class - bool should_run_; // True iff this test should run - bool is_disabled_; // True iff this test is disabled - bool matches_filter_; // True if this test matches the - // user-specified filter. - internal::TestFactoryBase* const factory_; // The factory that creates - // the test object - - // This field is mutable and needs to be reset before running the - // test for the second time. - TestResult result_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo); -}; - -// A test case, which consists of a vector of TestInfos. -// -// TestCase is not copyable. -class GTEST_API_ TestCase { - public: - // Creates a TestCase with the given name. - // - // TestCase does NOT have a default constructor. Always use this - // constructor to create a TestCase object. - // - // Arguments: - // - // name: name of the test case - // a_type_param: the name of the test's type parameter, or NULL if - // this is not a type-parameterized test. - // set_up_tc: pointer to the function that sets up the test case - // tear_down_tc: pointer to the function that tears down the test case - TestCase(const char* name, const char* a_type_param, - Test::SetUpTestCaseFunc set_up_tc, - Test::TearDownTestCaseFunc tear_down_tc); - - // Destructor of TestCase. - virtual ~TestCase(); - - // Gets the name of the TestCase. - const char* name() const { return name_.c_str(); } - - // Returns the name of the parameter type, or NULL if this is not a - // type-parameterized test case. - const char* type_param() const { - if (type_param_.get() != NULL) - return type_param_->c_str(); - return NULL; - } - - // Returns true if any test in this test case should run. - bool should_run() const { return should_run_; } - - // Gets the number of successful tests in this test case. - int successful_test_count() const; - - // Gets the number of failed tests in this test case. - int failed_test_count() const; - - // Gets the number of disabled tests in this test case. - int disabled_test_count() const; - - // Get the number of tests in this test case that should run. - int test_to_run_count() const; - - // Gets the number of all tests in this test case. - int total_test_count() const; - - // Returns true iff the test case passed. - bool Passed() const { return !Failed(); } - - // Returns true iff the test case failed. - bool Failed() const { return failed_test_count() > 0; } - - // Returns the elapsed time, in milliseconds. - TimeInMillis elapsed_time() const { return elapsed_time_; } - - // Returns the i-th test among all the tests. i can range from 0 to - // total_test_count() - 1. If i is not in that range, returns NULL. - const TestInfo* GetTestInfo(int i) const; - - private: - friend class Test; - friend class internal::UnitTestImpl; - - // Gets the (mutable) vector of TestInfos in this TestCase. - std::vector& test_info_list() { return test_info_list_; } - - // Gets the (immutable) vector of TestInfos in this TestCase. - const std::vector& test_info_list() const { - return test_info_list_; - } - - // Returns the i-th test among all the tests. i can range from 0 to - // total_test_count() - 1. If i is not in that range, returns NULL. - TestInfo* GetMutableTestInfo(int i); - - // Sets the should_run member. - void set_should_run(bool should) { should_run_ = should; } - - // Adds a TestInfo to this test case. Will delete the TestInfo upon - // destruction of the TestCase object. - void AddTestInfo(TestInfo * test_info); - - // Clears the results of all tests in this test case. - void ClearResult(); - - // Clears the results of all tests in the given test case. - static void ClearTestCaseResult(TestCase* test_case) { - test_case->ClearResult(); - } - - // Runs every test in this TestCase. - void Run(); - - // Runs SetUpTestCase() for this TestCase. This wrapper is needed - // for catching exceptions thrown from SetUpTestCase(). - void RunSetUpTestCase() { (*set_up_tc_)(); } - - // Runs TearDownTestCase() for this TestCase. This wrapper is - // needed for catching exceptions thrown from TearDownTestCase(). - void RunTearDownTestCase() { (*tear_down_tc_)(); } - - // Returns true iff test passed. - static bool TestPassed(const TestInfo* test_info) { - return test_info->should_run() && test_info->result()->Passed(); - } - - // Returns true iff test failed. - static bool TestFailed(const TestInfo* test_info) { - return test_info->should_run() && test_info->result()->Failed(); - } - - // Returns true iff test is disabled. - static bool TestDisabled(const TestInfo* test_info) { - return test_info->is_disabled_; - } - - // Returns true if the given test should run. - static bool ShouldRunTest(const TestInfo* test_info) { - return test_info->should_run(); - } - - // Shuffles the tests in this test case. - void ShuffleTests(internal::Random* random); - - // Restores the test order to before the first shuffle. - void UnshuffleTests(); - - // Name of the test case. - internal::String name_; - // Name of the parameter type, or NULL if this is not a typed or a - // type-parameterized test. - const internal::scoped_ptr type_param_; - // The vector of TestInfos in their original order. It owns the - // elements in the vector. - std::vector test_info_list_; - // Provides a level of indirection for the test list to allow easy - // shuffling and restoring the test order. The i-th element in this - // vector is the index of the i-th test in the shuffled test list. - std::vector test_indices_; - // Pointer to the function that sets up the test case. - Test::SetUpTestCaseFunc set_up_tc_; - // Pointer to the function that tears down the test case. - Test::TearDownTestCaseFunc tear_down_tc_; - // True iff any test in this test case should run. - bool should_run_; - // Elapsed time, in milliseconds. - TimeInMillis elapsed_time_; - - // We disallow copying TestCases. - GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase); -}; - -// An Environment object is capable of setting up and tearing down an -// environment. The user should subclass this to define his own -// environment(s). -// -// An Environment object does the set-up and tear-down in virtual -// methods SetUp() and TearDown() instead of the constructor and the -// destructor, as: -// -// 1. You cannot safely throw from a destructor. This is a problem -// as in some cases Google Test is used where exceptions are enabled, and -// we may want to implement ASSERT_* using exceptions where they are -// available. -// 2. You cannot use ASSERT_* directly in a constructor or -// destructor. -class Environment { - public: - // The d'tor is virtual as we need to subclass Environment. - virtual ~Environment() {} - - // Override this to define how to set up the environment. - virtual void SetUp() {} - - // Override this to define how to tear down the environment. - virtual void TearDown() {} - private: - // If you see an error about overriding the following function or - // about it being private, you have mis-spelled SetUp() as Setup(). - struct Setup_should_be_spelled_SetUp {}; - virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } -}; - -// The interface for tracing execution of tests. The methods are organized in -// the order the corresponding events are fired. -class TestEventListener { - public: - virtual ~TestEventListener() {} - - // Fired before any test activity starts. - virtual void OnTestProgramStart(const UnitTest& unit_test) = 0; - - // Fired before each iteration of tests starts. There may be more than - // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration - // index, starting from 0. - virtual void OnTestIterationStart(const UnitTest& unit_test, - int iteration) = 0; - - // Fired before environment set-up for each iteration of tests starts. - virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0; - - // Fired after environment set-up for each iteration of tests ends. - virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0; - - // Fired before the test case starts. - virtual void OnTestCaseStart(const TestCase& test_case) = 0; - - // Fired before the test starts. - virtual void OnTestStart(const TestInfo& test_info) = 0; - - // Fired after a failed assertion or a SUCCEED() invocation. - virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0; - - // Fired after the test ends. - virtual void OnTestEnd(const TestInfo& test_info) = 0; - - // Fired after the test case ends. - virtual void OnTestCaseEnd(const TestCase& test_case) = 0; - - // Fired before environment tear-down for each iteration of tests starts. - virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0; - - // Fired after environment tear-down for each iteration of tests ends. - virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0; - - // Fired after each iteration of tests finishes. - virtual void OnTestIterationEnd(const UnitTest& unit_test, - int iteration) = 0; - - // Fired after all test activities have ended. - virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0; -}; - -// The convenience class for users who need to override just one or two -// methods and are not concerned that a possible change to a signature of -// the methods they override will not be caught during the build. For -// comments about each method please see the definition of TestEventListener -// above. -class EmptyTestEventListener : public TestEventListener { - public: - virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {} - virtual void OnTestIterationStart(const UnitTest& /*unit_test*/, - int /*iteration*/) {} - virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {} - virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {} - virtual void OnTestCaseStart(const TestCase& /*test_case*/) {} - virtual void OnTestStart(const TestInfo& /*test_info*/) {} - virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {} - virtual void OnTestEnd(const TestInfo& /*test_info*/) {} - virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {} - virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {} - virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {} - virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/, - int /*iteration*/) {} - virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {} -}; - -// TestEventListeners lets users add listeners to track events in Google Test. -class GTEST_API_ TestEventListeners { - public: - TestEventListeners(); - ~TestEventListeners(); - - // Appends an event listener to the end of the list. Google Test assumes - // the ownership of the listener (i.e. it will delete the listener when - // the test program finishes). - void Append(TestEventListener* listener); - - // Removes the given event listener from the list and returns it. It then - // becomes the caller's responsibility to delete the listener. Returns - // NULL if the listener is not found in the list. - TestEventListener* Release(TestEventListener* listener); - - // Returns the standard listener responsible for the default console - // output. Can be removed from the listeners list to shut down default - // console output. Note that removing this object from the listener list - // with Release transfers its ownership to the caller and makes this - // function return NULL the next time. - TestEventListener* default_result_printer() const { - return default_result_printer_; - } - - // Returns the standard listener responsible for the default XML output - // controlled by the --gtest_output=xml flag. Can be removed from the - // listeners list by users who want to shut down the default XML output - // controlled by this flag and substitute it with custom one. Note that - // removing this object from the listener list with Release transfers its - // ownership to the caller and makes this function return NULL the next - // time. - TestEventListener* default_xml_generator() const { - return default_xml_generator_; - } - - private: - friend class TestCase; - friend class TestInfo; - friend class internal::DefaultGlobalTestPartResultReporter; - friend class internal::NoExecDeathTest; - friend class internal::TestEventListenersAccessor; - friend class internal::UnitTestImpl; - - // Returns repeater that broadcasts the TestEventListener events to all - // subscribers. - TestEventListener* repeater(); - - // Sets the default_result_printer attribute to the provided listener. - // The listener is also added to the listener list and previous - // default_result_printer is removed from it and deleted. The listener can - // also be NULL in which case it will not be added to the list. Does - // nothing if the previous and the current listener objects are the same. - void SetDefaultResultPrinter(TestEventListener* listener); - - // Sets the default_xml_generator attribute to the provided listener. The - // listener is also added to the listener list and previous - // default_xml_generator is removed from it and deleted. The listener can - // also be NULL in which case it will not be added to the list. Does - // nothing if the previous and the current listener objects are the same. - void SetDefaultXmlGenerator(TestEventListener* listener); - - // Controls whether events will be forwarded by the repeater to the - // listeners in the list. - bool EventForwardingEnabled() const; - void SuppressEventForwarding(); - - // The actual list of listeners. - internal::TestEventRepeater* repeater_; - // Listener responsible for the standard result output. - TestEventListener* default_result_printer_; - // Listener responsible for the creation of the XML output file. - TestEventListener* default_xml_generator_; - - // We disallow copying TestEventListeners. - GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners); -}; - -// A UnitTest consists of a vector of TestCases. -// -// This is a singleton class. The only instance of UnitTest is -// created when UnitTest::GetInstance() is first called. This -// instance is never deleted. -// -// UnitTest is not copyable. -// -// This class is thread-safe as long as the methods are called -// according to their specification. -class GTEST_API_ UnitTest { - public: - // Gets the singleton UnitTest object. The first time this method - // is called, a UnitTest object is constructed and returned. - // Consecutive calls will return the same object. - static UnitTest* GetInstance(); - - // Runs all tests in this UnitTest object and prints the result. - // Returns 0 if successful, or 1 otherwise. - // - // This method can only be called from the main thread. - // - // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. - int Run() GTEST_MUST_USE_RESULT_; - - // Returns the working directory when the first TEST() or TEST_F() - // was executed. The UnitTest object owns the string. - const char* original_working_dir() const; - - // Returns the TestCase object for the test that's currently running, - // or NULL if no test is running. - const TestCase* current_test_case() const; - - // Returns the TestInfo object for the test that's currently running, - // or NULL if no test is running. - const TestInfo* current_test_info() const; - - // Returns the random seed used at the start of the current test run. - int random_seed() const; - -#if GTEST_HAS_PARAM_TEST - // Returns the ParameterizedTestCaseRegistry object used to keep track of - // value-parameterized tests and instantiate and register them. - // - // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. - internal::ParameterizedTestCaseRegistry& parameterized_test_registry(); -#endif // GTEST_HAS_PARAM_TEST - - // Gets the number of successful test cases. - int successful_test_case_count() const; - - // Gets the number of failed test cases. - int failed_test_case_count() const; - - // Gets the number of all test cases. - int total_test_case_count() const; - - // Gets the number of all test cases that contain at least one test - // that should run. - int test_case_to_run_count() const; - - // Gets the number of successful tests. - int successful_test_count() const; - - // Gets the number of failed tests. - int failed_test_count() const; - - // Gets the number of disabled tests. - int disabled_test_count() const; - - // Gets the number of all tests. - int total_test_count() const; - - // Gets the number of tests that should run. - int test_to_run_count() const; - - // Gets the elapsed time, in milliseconds. - TimeInMillis elapsed_time() const; - - // Returns true iff the unit test passed (i.e. all test cases passed). - bool Passed() const; - - // Returns true iff the unit test failed (i.e. some test case failed - // or something outside of all tests failed). - bool Failed() const; - - // Gets the i-th test case among all the test cases. i can range from 0 to - // total_test_case_count() - 1. If i is not in that range, returns NULL. - const TestCase* GetTestCase(int i) const; - - // Returns the list of event listeners that can be used to track events - // inside Google Test. - TestEventListeners& listeners(); - - private: - // Registers and returns a global test environment. When a test - // program is run, all global test environments will be set-up in - // the order they were registered. After all tests in the program - // have finished, all global test environments will be torn-down in - // the *reverse* order they were registered. - // - // The UnitTest object takes ownership of the given environment. - // - // This method can only be called from the main thread. - Environment* AddEnvironment(Environment* env); - - // Adds a TestPartResult to the current TestResult object. All - // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) - // eventually call this to report their results. The user code - // should use the assertion macros instead of calling this directly. - void AddTestPartResult(TestPartResult::Type result_type, - const char* file_name, - int line_number, - const internal::String& message, - const internal::String& os_stack_trace); - - // Adds a TestProperty to the current TestResult object. If the result already - // contains a property with the same key, the value will be updated. - void RecordPropertyForCurrentTest(const char* key, const char* value); - - // Gets the i-th test case among all the test cases. i can range from 0 to - // total_test_case_count() - 1. If i is not in that range, returns NULL. - TestCase* GetMutableTestCase(int i); - - // Accessors for the implementation object. - internal::UnitTestImpl* impl() { return impl_; } - const internal::UnitTestImpl* impl() const { return impl_; } - - // These classes and funcions are friends as they need to access private - // members of UnitTest. - friend class Test; - friend class internal::AssertHelper; - friend class internal::ScopedTrace; - friend Environment* AddGlobalTestEnvironment(Environment* env); - friend internal::UnitTestImpl* internal::GetUnitTestImpl(); - friend void internal::ReportFailureInUnknownLocation( - TestPartResult::Type result_type, - const internal::String& message); - - // Creates an empty UnitTest. - UnitTest(); - - // D'tor - virtual ~UnitTest(); - - // Pushes a trace defined by SCOPED_TRACE() on to the per-thread - // Google Test trace stack. - void PushGTestTrace(const internal::TraceInfo& trace); - - // Pops a trace from the per-thread Google Test trace stack. - void PopGTestTrace(); - - // Protects mutable state in *impl_. This is mutable as some const - // methods need to lock it too. - mutable internal::Mutex mutex_; - - // Opaque implementation object. This field is never changed once - // the object is constructed. We don't mark it as const here, as - // doing so will cause a warning in the constructor of UnitTest. - // Mutable state in *impl_ is protected by mutex_. - internal::UnitTestImpl* impl_; - - // We disallow copying UnitTest. - GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest); -}; - -// A convenient wrapper for adding an environment for the test -// program. -// -// You should call this before RUN_ALL_TESTS() is called, probably in -// main(). If you use gtest_main, you need to call this before main() -// starts for it to take effect. For example, you can define a global -// variable like this: -// -// testing::Environment* const foo_env = -// testing::AddGlobalTestEnvironment(new FooEnvironment); -// -// However, we strongly recommend you to write your own main() and -// call AddGlobalTestEnvironment() there, as relying on initialization -// of global variables makes the code harder to read and may cause -// problems when you register multiple environments from different -// translation units and the environments have dependencies among them -// (remember that the compiler doesn't guarantee the order in which -// global variables from different translation units are initialized). -inline Environment* AddGlobalTestEnvironment(Environment* env) { - return UnitTest::GetInstance()->AddEnvironment(env); -} - -// Initializes Google Test. This must be called before calling -// RUN_ALL_TESTS(). In particular, it parses a command line for the -// flags that Google Test recognizes. Whenever a Google Test flag is -// seen, it is removed from argv, and *argc is decremented. -// -// No value is returned. Instead, the Google Test flag variables are -// updated. -// -// Calling the function for the second time has no user-visible effect. -GTEST_API_ void InitGoogleTest(int* argc, char** argv); - -// This overloaded version can be used in Windows programs compiled in -// UNICODE mode. -GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv); - -namespace internal { - -// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc) -// operand to be used in a failure message. The type (but not value) -// of the other operand may affect the format. This allows us to -// print a char* as a raw pointer when it is compared against another -// char*, and print it as a C string when it is compared against an -// std::string object, for example. -// -// The default implementation ignores the type of the other operand. -// Some specialized versions are used to handle formatting wide or -// narrow C strings. -// -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -template -String FormatForComparisonFailureMessage(const T1& value, - const T2& /* other_operand */) { - // C++Builder compiles this incorrectly if the namespace isn't explicitly - // given. - return ::testing::PrintToString(value); -} - -// The helper function for {ASSERT|EXPECT}_EQ. -template -AssertionResult CmpHelperEQ(const char* expected_expression, - const char* actual_expression, - const T1& expected, - const T2& actual) { -#ifdef _MSC_VER -# pragma warning(push) // Saves the current warning state. -# pragma warning(disable:4389) // Temporarily disables warning on - // signed/unsigned mismatch. -#endif - - if (expected == actual) { - return AssertionSuccess(); - } - -#ifdef _MSC_VER -# pragma warning(pop) // Restores the warning state. -#endif - - return EqFailure(expected_expression, - actual_expression, - FormatForComparisonFailureMessage(expected, actual), - FormatForComparisonFailureMessage(actual, expected), - false); -} - -// With this overloaded version, we allow anonymous enums to be used -// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums -// can be implicitly cast to BiggestInt. -GTEST_API_ AssertionResult CmpHelperEQ(const char* expected_expression, - const char* actual_expression, - BiggestInt expected, - BiggestInt actual); - -// The helper class for {ASSERT|EXPECT}_EQ. The template argument -// lhs_is_null_literal is true iff the first argument to ASSERT_EQ() -// is a null pointer literal. The following default implementation is -// for lhs_is_null_literal being false. -template -class EqHelper { - public: - // This templatized version is for the general case. - template - static AssertionResult Compare(const char* expected_expression, - const char* actual_expression, - const T1& expected, - const T2& actual) { - return CmpHelperEQ(expected_expression, actual_expression, expected, - actual); - } - - // With this overloaded version, we allow anonymous enums to be used - // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous - // enums can be implicitly cast to BiggestInt. - // - // Even though its body looks the same as the above version, we - // cannot merge the two, as it will make anonymous enums unhappy. - static AssertionResult Compare(const char* expected_expression, - const char* actual_expression, - BiggestInt expected, - BiggestInt actual) { - return CmpHelperEQ(expected_expression, actual_expression, expected, - actual); - } -}; - -// This specialization is used when the first argument to ASSERT_EQ() -// is a null pointer literal, like NULL, false, or 0. -template <> -class EqHelper { - public: - // We define two overloaded versions of Compare(). The first - // version will be picked when the second argument to ASSERT_EQ() is - // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or - // EXPECT_EQ(false, a_bool). - template - static AssertionResult Compare( - const char* expected_expression, - const char* actual_expression, - const T1& expected, - const T2& actual, - // The following line prevents this overload from being considered if T2 - // is not a pointer type. We need this because ASSERT_EQ(NULL, my_ptr) - // expands to Compare("", "", NULL, my_ptr), which requires a conversion - // to match the Secret* in the other overload, which would otherwise make - // this template match better. - typename EnableIf::value>::type* = 0) { - return CmpHelperEQ(expected_expression, actual_expression, expected, - actual); - } - - // This version will be picked when the second argument to ASSERT_EQ() is a - // pointer, e.g. ASSERT_EQ(NULL, a_pointer). - template - static AssertionResult Compare( - const char* expected_expression, - const char* actual_expression, - // We used to have a second template parameter instead of Secret*. That - // template parameter would deduce to 'long', making this a better match - // than the first overload even without the first overload's EnableIf. - // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to - // non-pointer argument" (even a deduced integral argument), so the old - // implementation caused warnings in user code. - Secret* /* expected (NULL) */, - T* actual) { - // We already know that 'expected' is a null pointer. - return CmpHelperEQ(expected_expression, actual_expression, - static_cast(NULL), actual); - } -}; - -// A macro for implementing the helper functions needed to implement -// ASSERT_?? and EXPECT_??. It is here just to avoid copy-and-paste -// of similar code. -// -// For each templatized helper function, we also define an overloaded -// version for BiggestInt in order to reduce code bloat and allow -// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled -// with gcc 4. -// -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -#define GTEST_IMPL_CMP_HELPER_(op_name, op)\ -template \ -AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ - const T1& val1, const T2& val2) {\ - if (val1 op val2) {\ - return AssertionSuccess();\ - } else {\ - return AssertionFailure() \ - << "Expected: (" << expr1 << ") " #op " (" << expr2\ - << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ - << " vs " << FormatForComparisonFailureMessage(val2, val1);\ - }\ -}\ -GTEST_API_ AssertionResult CmpHelper##op_name(\ - const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2) - -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. - -// Implements the helper function for {ASSERT|EXPECT}_NE -GTEST_IMPL_CMP_HELPER_(NE, !=); -// Implements the helper function for {ASSERT|EXPECT}_LE -GTEST_IMPL_CMP_HELPER_(LE, <=); -// Implements the helper function for {ASSERT|EXPECT}_LT -GTEST_IMPL_CMP_HELPER_(LT, < ); -// Implements the helper function for {ASSERT|EXPECT}_GE -GTEST_IMPL_CMP_HELPER_(GE, >=); -// Implements the helper function for {ASSERT|EXPECT}_GT -GTEST_IMPL_CMP_HELPER_(GT, > ); - -#undef GTEST_IMPL_CMP_HELPER_ - -// The helper function for {ASSERT|EXPECT}_STREQ. -// -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression, - const char* actual_expression, - const char* expected, - const char* actual); - -// The helper function for {ASSERT|EXPECT}_STRCASEEQ. -// -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression, - const char* actual_expression, - const char* expected, - const char* actual); - -// The helper function for {ASSERT|EXPECT}_STRNE. -// -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression, - const char* s2_expression, - const char* s1, - const char* s2); - -// The helper function for {ASSERT|EXPECT}_STRCASENE. -// -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression, - const char* s2_expression, - const char* s1, - const char* s2); - - -// Helper function for *_STREQ on wide strings. -// -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression, - const char* actual_expression, - const wchar_t* expected, - const wchar_t* actual); - -// Helper function for *_STRNE on wide strings. -// -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression, - const char* s2_expression, - const wchar_t* s1, - const wchar_t* s2); - -} // namespace internal - -// IsSubstring() and IsNotSubstring() are intended to be used as the -// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by -// themselves. They check whether needle is a substring of haystack -// (NULL is considered a substring of itself only), and return an -// appropriate error message when they fail. -// -// The {needle,haystack}_expr arguments are the stringified -// expressions that generated the two real arguments. -GTEST_API_ AssertionResult IsSubstring( - const char* needle_expr, const char* haystack_expr, - const char* needle, const char* haystack); -GTEST_API_ AssertionResult IsSubstring( - const char* needle_expr, const char* haystack_expr, - const wchar_t* needle, const wchar_t* haystack); -GTEST_API_ AssertionResult IsNotSubstring( - const char* needle_expr, const char* haystack_expr, - const char* needle, const char* haystack); -GTEST_API_ AssertionResult IsNotSubstring( - const char* needle_expr, const char* haystack_expr, - const wchar_t* needle, const wchar_t* haystack); -GTEST_API_ AssertionResult IsSubstring( - const char* needle_expr, const char* haystack_expr, - const ::std::string& needle, const ::std::string& haystack); -GTEST_API_ AssertionResult IsNotSubstring( - const char* needle_expr, const char* haystack_expr, - const ::std::string& needle, const ::std::string& haystack); - -#if GTEST_HAS_STD_WSTRING -GTEST_API_ AssertionResult IsSubstring( - const char* needle_expr, const char* haystack_expr, - const ::std::wstring& needle, const ::std::wstring& haystack); -GTEST_API_ AssertionResult IsNotSubstring( - const char* needle_expr, const char* haystack_expr, - const ::std::wstring& needle, const ::std::wstring& haystack); -#endif // GTEST_HAS_STD_WSTRING - -namespace internal { - -// Helper template function for comparing floating-points. -// -// Template parameter: -// -// RawType: the raw floating-point type (either float or double) -// -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -template -AssertionResult CmpHelperFloatingPointEQ(const char* expected_expression, - const char* actual_expression, - RawType expected, - RawType actual) { - const FloatingPoint lhs(expected), rhs(actual); - - if (lhs.AlmostEquals(rhs)) { - return AssertionSuccess(); - } - - ::std::stringstream expected_ss; - expected_ss << std::setprecision(std::numeric_limits::digits10 + 2) - << expected; - - ::std::stringstream actual_ss; - actual_ss << std::setprecision(std::numeric_limits::digits10 + 2) - << actual; - - return EqFailure(expected_expression, - actual_expression, - StringStreamToString(&expected_ss), - StringStreamToString(&actual_ss), - false); -} - -// Helper function for implementing ASSERT_NEAR. -// -// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. -GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1, - const char* expr2, - const char* abs_error_expr, - double val1, - double val2, - double abs_error); - -// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. -// A class that enables one to stream messages to assertion macros -class GTEST_API_ AssertHelper { - public: - // Constructor. - AssertHelper(TestPartResult::Type type, - const char* file, - int line, - const char* message); - ~AssertHelper(); - - // Message assignment is a semantic trick to enable assertion - // streaming; see the GTEST_MESSAGE_ macro below. - void operator=(const Message& message) const; - - private: - // We put our data in a struct so that the size of the AssertHelper class can - // be as small as possible. This is important because gcc is incapable of - // re-using stack space even for temporary variables, so every EXPECT_EQ - // reserves stack space for another AssertHelper. - struct AssertHelperData { - AssertHelperData(TestPartResult::Type t, - const char* srcfile, - int line_num, - const char* msg) - : type(t), file(srcfile), line(line_num), message(msg) { } - - TestPartResult::Type const type; - const char* const file; - int const line; - String const message; - - private: - GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData); - }; - - AssertHelperData* const data_; - - GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper); -}; - -} // namespace internal - -#if GTEST_HAS_PARAM_TEST -// The pure interface class that all value-parameterized tests inherit from. -// A value-parameterized class must inherit from both ::testing::Test and -// ::testing::WithParamInterface. In most cases that just means inheriting -// from ::testing::TestWithParam, but more complicated test hierarchies -// may need to inherit from Test and WithParamInterface at different levels. -// -// This interface has support for accessing the test parameter value via -// the GetParam() method. -// -// Use it with one of the parameter generator defining functions, like Range(), -// Values(), ValuesIn(), Bool(), and Combine(). -// -// class FooTest : public ::testing::TestWithParam { -// protected: -// FooTest() { -// // Can use GetParam() here. -// } -// virtual ~FooTest() { -// // Can use GetParam() here. -// } -// virtual void SetUp() { -// // Can use GetParam() here. -// } -// virtual void TearDown { -// // Can use GetParam() here. -// } -// }; -// TEST_P(FooTest, DoesBar) { -// // Can use GetParam() method here. -// Foo foo; -// ASSERT_TRUE(foo.DoesBar(GetParam())); -// } -// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10)); - -template -class WithParamInterface { - public: - typedef T ParamType; - virtual ~WithParamInterface() {} - - // The current parameter value. Is also available in the test fixture's - // constructor. This member function is non-static, even though it only - // references static data, to reduce the opportunity for incorrect uses - // like writing 'WithParamInterface::GetParam()' for a test that - // uses a fixture whose parameter type is int. - const ParamType& GetParam() const { return *parameter_; } - - private: - // Sets parameter value. The caller is responsible for making sure the value - // remains alive and unchanged throughout the current test. - static void SetParam(const ParamType* parameter) { - parameter_ = parameter; - } - - // Static value used for accessing parameter during a test lifetime. - static const ParamType* parameter_; - - // TestClass must be a subclass of WithParamInterface and Test. - template friend class internal::ParameterizedTestFactory; -}; - -template -const T* WithParamInterface::parameter_ = NULL; - -// Most value-parameterized classes can ignore the existence of -// WithParamInterface, and can just inherit from ::testing::TestWithParam. - -template -class TestWithParam : public Test, public WithParamInterface { -}; - -#endif // GTEST_HAS_PARAM_TEST - -// Macros for indicating success/failure in test code. - -// ADD_FAILURE unconditionally adds a failure to the current test. -// SUCCEED generates a success - it doesn't automatically make the -// current test successful, as a test is only successful when it has -// no failure. -// -// EXPECT_* verifies that a certain condition is satisfied. If not, -// it behaves like ADD_FAILURE. In particular: -// -// EXPECT_TRUE verifies that a Boolean condition is true. -// EXPECT_FALSE verifies that a Boolean condition is false. -// -// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except -// that they will also abort the current function on failure. People -// usually want the fail-fast behavior of FAIL and ASSERT_*, but those -// writing data-driven tests often find themselves using ADD_FAILURE -// and EXPECT_* more. -// -// Examples: -// -// EXPECT_TRUE(server.StatusIsOK()); -// ASSERT_FALSE(server.HasPendingRequest(port)) -// << "There are still pending requests " << "on port " << port; - -// Generates a nonfatal failure with a generic message. -#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed") - -// Generates a nonfatal failure at the given source file location with -// a generic message. -#define ADD_FAILURE_AT(file, line) \ - GTEST_MESSAGE_AT_(file, line, "Failed", \ - ::testing::TestPartResult::kNonFatalFailure) - -// Generates a fatal failure with a generic message. -#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed") - -// Define this macro to 1 to omit the definition of FAIL(), which is a -// generic name and clashes with some other libraries. -#if !GTEST_DONT_DEFINE_FAIL -# define FAIL() GTEST_FAIL() -#endif - -// Generates a success with a generic message. -#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded") - -// Define this macro to 1 to omit the definition of SUCCEED(), which -// is a generic name and clashes with some other libraries. -#if !GTEST_DONT_DEFINE_SUCCEED -# define SUCCEED() GTEST_SUCCEED() -#endif - -// Macros for testing exceptions. -// -// * {ASSERT|EXPECT}_THROW(statement, expected_exception): -// Tests that the statement throws the expected exception. -// * {ASSERT|EXPECT}_NO_THROW(statement): -// Tests that the statement doesn't throw any exception. -// * {ASSERT|EXPECT}_ANY_THROW(statement): -// Tests that the statement throws an exception. - -#define EXPECT_THROW(statement, expected_exception) \ - GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_) -#define EXPECT_NO_THROW(statement) \ - GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_) -#define EXPECT_ANY_THROW(statement) \ - GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_) -#define ASSERT_THROW(statement, expected_exception) \ - GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_) -#define ASSERT_NO_THROW(statement) \ - GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_) -#define ASSERT_ANY_THROW(statement) \ - GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_) - -// Boolean assertions. Condition can be either a Boolean expression or an -// AssertionResult. For more information on how to use AssertionResult with -// these macros see comments on that class. -#define EXPECT_TRUE(condition) \ - GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \ - GTEST_NONFATAL_FAILURE_) -#define EXPECT_FALSE(condition) \ - GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \ - GTEST_NONFATAL_FAILURE_) -#define ASSERT_TRUE(condition) \ - GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \ - GTEST_FATAL_FAILURE_) -#define ASSERT_FALSE(condition) \ - GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \ - GTEST_FATAL_FAILURE_) - -// Includes the auto-generated header that implements a family of -// generic predicate assertion macros. -// Copyright 2006, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// This file is AUTOMATICALLY GENERATED on 09/24/2010 by command -// 'gen_gtest_pred_impl.py 5'. DO NOT EDIT BY HAND! -// -// Implements a family of generic predicate assertion macros. - -#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ -#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ - -// Makes sure this header is not included before gtest.h. -#ifndef GTEST_INCLUDE_GTEST_GTEST_H_ -# error Do not include gtest_pred_impl.h directly. Include gtest.h instead. -#endif // GTEST_INCLUDE_GTEST_GTEST_H_ - -// This header implements a family of generic predicate assertion -// macros: -// -// ASSERT_PRED_FORMAT1(pred_format, v1) -// ASSERT_PRED_FORMAT2(pred_format, v1, v2) -// ... -// -// where pred_format is a function or functor that takes n (in the -// case of ASSERT_PRED_FORMATn) values and their source expression -// text, and returns a testing::AssertionResult. See the definition -// of ASSERT_EQ in gtest.h for an example. -// -// If you don't care about formatting, you can use the more -// restrictive version: -// -// ASSERT_PRED1(pred, v1) -// ASSERT_PRED2(pred, v1, v2) -// ... -// -// where pred is an n-ary function or functor that returns bool, -// and the values v1, v2, ..., must support the << operator for -// streaming to std::ostream. -// -// We also define the EXPECT_* variations. -// -// For now we only support predicates whose arity is at most 5. -// Please email googletestframework@googlegroups.com if you need -// support for higher arities. - -// GTEST_ASSERT_ is the basic statement to which all of the assertions -// in this file reduce. Don't use this in your code. - -#define GTEST_ASSERT_(expression, on_failure) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (const ::testing::AssertionResult gtest_ar = (expression)) \ - ; \ - else \ - on_failure(gtest_ar.failure_message()) - - -// Helper function for implementing {EXPECT|ASSERT}_PRED1. Don't use -// this in your code. -template -AssertionResult AssertPred1Helper(const char* pred_text, - const char* e1, - Pred pred, - const T1& v1) { - if (pred(v1)) return AssertionSuccess(); - - return AssertionFailure() << pred_text << "(" - << e1 << ") evaluates to false, where" - << "\n" << e1 << " evaluates to " << v1; -} - -// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1. -// Don't use this in your code. -#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\ - GTEST_ASSERT_(pred_format(#v1, v1),\ - on_failure) - -// Internal macro for implementing {EXPECT|ASSERT}_PRED1. Don't use -// this in your code. -#define GTEST_PRED1_(pred, v1, on_failure)\ - GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \ - #v1, \ - pred, \ - v1), on_failure) - -// Unary predicate assertion macros. -#define EXPECT_PRED_FORMAT1(pred_format, v1) \ - GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_) -#define EXPECT_PRED1(pred, v1) \ - GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_) -#define ASSERT_PRED_FORMAT1(pred_format, v1) \ - GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_) -#define ASSERT_PRED1(pred, v1) \ - GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_) - - - -// Helper function for implementing {EXPECT|ASSERT}_PRED2. Don't use -// this in your code. -template -AssertionResult AssertPred2Helper(const char* pred_text, - const char* e1, - const char* e2, - Pred pred, - const T1& v1, - const T2& v2) { - if (pred(v1, v2)) return AssertionSuccess(); - - return AssertionFailure() << pred_text << "(" - << e1 << ", " - << e2 << ") evaluates to false, where" - << "\n" << e1 << " evaluates to " << v1 - << "\n" << e2 << " evaluates to " << v2; -} - -// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2. -// Don't use this in your code. -#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\ - GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2),\ - on_failure) - -// Internal macro for implementing {EXPECT|ASSERT}_PRED2. Don't use -// this in your code. -#define GTEST_PRED2_(pred, v1, v2, on_failure)\ - GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \ - #v1, \ - #v2, \ - pred, \ - v1, \ - v2), on_failure) - -// Binary predicate assertion macros. -#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \ - GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_) -#define EXPECT_PRED2(pred, v1, v2) \ - GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_) -#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \ - GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_) -#define ASSERT_PRED2(pred, v1, v2) \ - GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_) - - - -// Helper function for implementing {EXPECT|ASSERT}_PRED3. Don't use -// this in your code. -template -AssertionResult AssertPred3Helper(const char* pred_text, - const char* e1, - const char* e2, - const char* e3, - Pred pred, - const T1& v1, - const T2& v2, - const T3& v3) { - if (pred(v1, v2, v3)) return AssertionSuccess(); - - return AssertionFailure() << pred_text << "(" - << e1 << ", " - << e2 << ", " - << e3 << ") evaluates to false, where" - << "\n" << e1 << " evaluates to " << v1 - << "\n" << e2 << " evaluates to " << v2 - << "\n" << e3 << " evaluates to " << v3; -} - -// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3. -// Don't use this in your code. -#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\ - GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3),\ - on_failure) - -// Internal macro for implementing {EXPECT|ASSERT}_PRED3. Don't use -// this in your code. -#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\ - GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \ - #v1, \ - #v2, \ - #v3, \ - pred, \ - v1, \ - v2, \ - v3), on_failure) - -// Ternary predicate assertion macros. -#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \ - GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_) -#define EXPECT_PRED3(pred, v1, v2, v3) \ - GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_) -#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \ - GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_) -#define ASSERT_PRED3(pred, v1, v2, v3) \ - GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_) - - - -// Helper function for implementing {EXPECT|ASSERT}_PRED4. Don't use -// this in your code. -template -AssertionResult AssertPred4Helper(const char* pred_text, - const char* e1, - const char* e2, - const char* e3, - const char* e4, - Pred pred, - const T1& v1, - const T2& v2, - const T3& v3, - const T4& v4) { - if (pred(v1, v2, v3, v4)) return AssertionSuccess(); - - return AssertionFailure() << pred_text << "(" - << e1 << ", " - << e2 << ", " - << e3 << ", " - << e4 << ") evaluates to false, where" - << "\n" << e1 << " evaluates to " << v1 - << "\n" << e2 << " evaluates to " << v2 - << "\n" << e3 << " evaluates to " << v3 - << "\n" << e4 << " evaluates to " << v4; -} - -// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4. -// Don't use this in your code. -#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\ - GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4),\ - on_failure) - -// Internal macro for implementing {EXPECT|ASSERT}_PRED4. Don't use -// this in your code. -#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\ - GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \ - #v1, \ - #v2, \ - #v3, \ - #v4, \ - pred, \ - v1, \ - v2, \ - v3, \ - v4), on_failure) - -// 4-ary predicate assertion macros. -#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \ - GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_) -#define EXPECT_PRED4(pred, v1, v2, v3, v4) \ - GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_) -#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \ - GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_) -#define ASSERT_PRED4(pred, v1, v2, v3, v4) \ - GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_) - - - -// Helper function for implementing {EXPECT|ASSERT}_PRED5. Don't use -// this in your code. -template -AssertionResult AssertPred5Helper(const char* pred_text, - const char* e1, - const char* e2, - const char* e3, - const char* e4, - const char* e5, - Pred pred, - const T1& v1, - const T2& v2, - const T3& v3, - const T4& v4, - const T5& v5) { - if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess(); - - return AssertionFailure() << pred_text << "(" - << e1 << ", " - << e2 << ", " - << e3 << ", " - << e4 << ", " - << e5 << ") evaluates to false, where" - << "\n" << e1 << " evaluates to " << v1 - << "\n" << e2 << " evaluates to " << v2 - << "\n" << e3 << " evaluates to " << v3 - << "\n" << e4 << " evaluates to " << v4 - << "\n" << e5 << " evaluates to " << v5; -} - -// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5. -// Don't use this in your code. -#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\ - GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5),\ - on_failure) - -// Internal macro for implementing {EXPECT|ASSERT}_PRED5. Don't use -// this in your code. -#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\ - GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \ - #v1, \ - #v2, \ - #v3, \ - #v4, \ - #v5, \ - pred, \ - v1, \ - v2, \ - v3, \ - v4, \ - v5), on_failure) - -// 5-ary predicate assertion macros. -#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \ - GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_) -#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \ - GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_) -#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \ - GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_) -#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \ - GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_) - - - -#endif // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ - -// Macros for testing equalities and inequalities. -// -// * {ASSERT|EXPECT}_EQ(expected, actual): Tests that expected == actual -// * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2 -// * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2 -// * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2 -// * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2 -// * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2 -// -// When they are not, Google Test prints both the tested expressions and -// their actual values. The values must be compatible built-in types, -// or you will get a compiler error. By "compatible" we mean that the -// values can be compared by the respective operator. -// -// Note: -// -// 1. It is possible to make a user-defined type work with -// {ASSERT|EXPECT}_??(), but that requires overloading the -// comparison operators and is thus discouraged by the Google C++ -// Usage Guide. Therefore, you are advised to use the -// {ASSERT|EXPECT}_TRUE() macro to assert that two objects are -// equal. -// -// 2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on -// pointers (in particular, C strings). Therefore, if you use it -// with two C strings, you are testing how their locations in memory -// are related, not how their content is related. To compare two C -// strings by content, use {ASSERT|EXPECT}_STR*(). -// -// 3. {ASSERT|EXPECT}_EQ(expected, actual) is preferred to -// {ASSERT|EXPECT}_TRUE(expected == actual), as the former tells you -// what the actual value is when it fails, and similarly for the -// other comparisons. -// -// 4. Do not depend on the order in which {ASSERT|EXPECT}_??() -// evaluate their arguments, which is undefined. -// -// 5. These macros evaluate their arguments exactly once. -// -// Examples: -// -// EXPECT_NE(5, Foo()); -// EXPECT_EQ(NULL, a_pointer); -// ASSERT_LT(i, array_size); -// ASSERT_GT(records.size(), 0) << "There is no record left."; - -#define EXPECT_EQ(expected, actual) \ - EXPECT_PRED_FORMAT2(::testing::internal:: \ - EqHelper::Compare, \ - expected, actual) -#define EXPECT_NE(expected, actual) \ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, expected, actual) -#define EXPECT_LE(val1, val2) \ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2) -#define EXPECT_LT(val1, val2) \ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2) -#define EXPECT_GE(val1, val2) \ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2) -#define EXPECT_GT(val1, val2) \ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2) - -#define GTEST_ASSERT_EQ(expected, actual) \ - ASSERT_PRED_FORMAT2(::testing::internal:: \ - EqHelper::Compare, \ - expected, actual) -#define GTEST_ASSERT_NE(val1, val2) \ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2) -#define GTEST_ASSERT_LE(val1, val2) \ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2) -#define GTEST_ASSERT_LT(val1, val2) \ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2) -#define GTEST_ASSERT_GE(val1, val2) \ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2) -#define GTEST_ASSERT_GT(val1, val2) \ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2) - -// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of -// ASSERT_XY(), which clashes with some users' own code. - -#if !GTEST_DONT_DEFINE_ASSERT_EQ -# define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2) -#endif - -#if !GTEST_DONT_DEFINE_ASSERT_NE -# define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2) -#endif - -#if !GTEST_DONT_DEFINE_ASSERT_LE -# define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2) -#endif - -#if !GTEST_DONT_DEFINE_ASSERT_LT -# define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2) -#endif - -#if !GTEST_DONT_DEFINE_ASSERT_GE -# define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2) -#endif - -#if !GTEST_DONT_DEFINE_ASSERT_GT -# define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2) -#endif - -// C String Comparisons. All tests treat NULL and any non-NULL string -// as different. Two NULLs are equal. -// -// * {ASSERT|EXPECT}_STREQ(s1, s2): Tests that s1 == s2 -// * {ASSERT|EXPECT}_STRNE(s1, s2): Tests that s1 != s2 -// * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case -// * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case -// -// For wide or narrow string objects, you can use the -// {ASSERT|EXPECT}_??() macros. -// -// Don't depend on the order in which the arguments are evaluated, -// which is undefined. -// -// These macros evaluate their arguments exactly once. - -#define EXPECT_STREQ(expected, actual) \ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual) -#define EXPECT_STRNE(s1, s2) \ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2) -#define EXPECT_STRCASEEQ(expected, actual) \ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual) -#define EXPECT_STRCASENE(s1, s2)\ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2) - -#define ASSERT_STREQ(expected, actual) \ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual) -#define ASSERT_STRNE(s1, s2) \ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2) -#define ASSERT_STRCASEEQ(expected, actual) \ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual) -#define ASSERT_STRCASENE(s1, s2)\ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2) - -// Macros for comparing floating-point numbers. -// -// * {ASSERT|EXPECT}_FLOAT_EQ(expected, actual): -// Tests that two float values are almost equal. -// * {ASSERT|EXPECT}_DOUBLE_EQ(expected, actual): -// Tests that two double values are almost equal. -// * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error): -// Tests that v1 and v2 are within the given distance to each other. -// -// Google Test uses ULP-based comparison to automatically pick a default -// error bound that is appropriate for the operands. See the -// FloatingPoint template class in gtest-internal.h if you are -// interested in the implementation details. - -#define EXPECT_FLOAT_EQ(expected, actual)\ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ - expected, actual) - -#define EXPECT_DOUBLE_EQ(expected, actual)\ - EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ - expected, actual) - -#define ASSERT_FLOAT_EQ(expected, actual)\ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ - expected, actual) - -#define ASSERT_DOUBLE_EQ(expected, actual)\ - ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ - expected, actual) - -#define EXPECT_NEAR(val1, val2, abs_error)\ - EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \ - val1, val2, abs_error) - -#define ASSERT_NEAR(val1, val2, abs_error)\ - ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \ - val1, val2, abs_error) - -// These predicate format functions work on floating-point values, and -// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g. -// -// EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0); - -// Asserts that val1 is less than, or almost equal to, val2. Fails -// otherwise. In particular, it fails if either val1 or val2 is NaN. -GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2, - float val1, float val2); -GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2, - double val1, double val2); - - -#if GTEST_OS_WINDOWS - -// Macros that test for HRESULT failure and success, these are only useful -// on Windows, and rely on Windows SDK macros and APIs to compile. -// -// * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr) -// -// When expr unexpectedly fails or succeeds, Google Test prints the -// expected result and the actual result with both a human-readable -// string representation of the error, if available, as well as the -// hex result code. -# define EXPECT_HRESULT_SUCCEEDED(expr) \ - EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr)) - -# define ASSERT_HRESULT_SUCCEEDED(expr) \ - ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr)) - -# define EXPECT_HRESULT_FAILED(expr) \ - EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr)) - -# define ASSERT_HRESULT_FAILED(expr) \ - ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr)) - -#endif // GTEST_OS_WINDOWS - -// Macros that execute statement and check that it doesn't generate new fatal -// failures in the current thread. -// -// * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement); -// -// Examples: -// -// EXPECT_NO_FATAL_FAILURE(Process()); -// ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed"; -// -#define ASSERT_NO_FATAL_FAILURE(statement) \ - GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_) -#define EXPECT_NO_FATAL_FAILURE(statement) \ - GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_) - -// Causes a trace (including the source file path, the current line -// number, and the given message) to be included in every test failure -// message generated by code in the current scope. The effect is -// undone when the control leaves the current scope. -// -// The message argument can be anything streamable to std::ostream. -// -// In the implementation, we include the current line number as part -// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s -// to appear in the same block - as long as they are on different -// lines. -#define SCOPED_TRACE(message) \ - ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\ - __FILE__, __LINE__, ::testing::Message() << (message)) - -// Compile-time assertion for type equality. -// StaticAssertTypeEq() compiles iff type1 and type2 are -// the same type. The value it returns is not interesting. -// -// Instead of making StaticAssertTypeEq a class template, we make it a -// function template that invokes a helper class template. This -// prevents a user from misusing StaticAssertTypeEq by -// defining objects of that type. -// -// CAVEAT: -// -// When used inside a method of a class template, -// StaticAssertTypeEq() is effective ONLY IF the method is -// instantiated. For example, given: -// -// template class Foo { -// public: -// void Bar() { testing::StaticAssertTypeEq(); } -// }; -// -// the code: -// -// void Test1() { Foo foo; } -// -// will NOT generate a compiler error, as Foo::Bar() is never -// actually instantiated. Instead, you need: -// -// void Test2() { Foo foo; foo.Bar(); } -// -// to cause a compiler error. -template -bool StaticAssertTypeEq() { - (void)internal::StaticAssertTypeEqHelper(); - return true; -} - -// Defines a test. -// -// The first parameter is the name of the test case, and the second -// parameter is the name of the test within the test case. -// -// The convention is to end the test case name with "Test". For -// example, a test case for the Foo class can be named FooTest. -// -// The user should put his test code between braces after using this -// macro. Example: -// -// TEST(FooTest, InitializesCorrectly) { -// Foo foo; -// EXPECT_TRUE(foo.StatusIsOK()); -// } - -// Note that we call GetTestTypeId() instead of GetTypeId< -// ::testing::Test>() here to get the type ID of testing::Test. This -// is to work around a suspected linker bug when using Google Test as -// a framework on Mac OS X. The bug causes GetTypeId< -// ::testing::Test>() to return different values depending on whether -// the call is from the Google Test framework itself or from user test -// code. GetTestTypeId() is guaranteed to always return the same -// value, as it always calls GetTypeId<>() from the Google Test -// framework. -#define GTEST_TEST(test_case_name, test_name)\ - GTEST_TEST_(test_case_name, test_name, \ - ::testing::Test, ::testing::internal::GetTestTypeId()) - -// Define this macro to 1 to omit the definition of TEST(), which -// is a generic name and clashes with some other libraries. -#if !GTEST_DONT_DEFINE_TEST -# define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name) -#endif - -// Defines a test that uses a test fixture. -// -// The first parameter is the name of the test fixture class, which -// also doubles as the test case name. The second parameter is the -// name of the test within the test case. -// -// A test fixture class must be declared earlier. The user should put -// his test code between braces after using this macro. Example: -// -// class FooTest : public testing::Test { -// protected: -// virtual void SetUp() { b_.AddElement(3); } -// -// Foo a_; -// Foo b_; -// }; -// -// TEST_F(FooTest, InitializesCorrectly) { -// EXPECT_TRUE(a_.StatusIsOK()); -// } -// -// TEST_F(FooTest, ReturnsElementCountCorrectly) { -// EXPECT_EQ(0, a_.size()); -// EXPECT_EQ(1, b_.size()); -// } - -#define TEST_F(test_fixture, test_name)\ - GTEST_TEST_(test_fixture, test_name, test_fixture, \ - ::testing::internal::GetTypeId()) - -// Use this macro in main() to run all tests. It returns 0 if all -// tests are successful, or 1 otherwise. -// -// RUN_ALL_TESTS() should be invoked after the command line has been -// parsed by InitGoogleTest(). - -#define RUN_ALL_TESTS()\ - (::testing::UnitTest::GetInstance()->Run()) - -} // namespace testing - -#endif // GTEST_INCLUDE_GTEST_GTEST_H_ diff --git a/kokkos/kokkos/containers/src/Kokkos_DualView.hpp b/kokkos/kokkos/containers/src/Kokkos_DualView.hpp deleted file mode 100644 index 80a30b7..0000000 --- a/kokkos/kokkos/containers/src/Kokkos_DualView.hpp +++ /dev/null @@ -1,241 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - - -/* DualView: container class to manage data structures which exist both on Host and Device - * member functions: - * DualView() - * DualView(label,dim0,dim1,dim2,...) - * view() - * sync() - * modify() - * resize(dim0,dim1,dim2,...) - */ -#ifndef KOKKOS_DUALVIEW_HPP -#define KOKKOS_DUALVIEW_HPP - -#include -namespace Kokkos { - -template< class T , class L , class D> -class DualView { -public: - - /* Define base types for Device and Host */ - - typedef Kokkos::View t_dev ; - typedef typename t_dev::HostMirror t_host ; - - /* Define typedefs for different usage scenarios */ - - // Define const view types - typedef Kokkos::View t_dev_const; - typedef typename t_dev_const::HostMirror t_host_const; - - // Define const randomread view types - typedef Kokkos::View t_dev_const_randomread ; - typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread; - - // Define unmanaged view types - typedef Kokkos::View t_dev_um; - typedef Kokkos::View t_host_um; - - // Define const unmanaged view types - typedef Kokkos::View t_dev_const_um; - typedef Kokkos::View t_host_const_um; - - /* provide the same typedefs as a view for scalar, data and value types */ - - typedef typename t_dev::value_type value_type; - typedef typename t_dev::const_value_type const_value_type; - typedef typename t_dev::scalar_type scalar_type; - typedef typename t_dev::const_scalar_type const_scalar_type; - typedef typename t_dev::non_const_scalar_type non_const_scalar_type; - - /* Instances of base types */ - - t_dev d_view; - t_host h_view; - - - /* Counters to keep track of changes (dirty-flags) */ - - unsigned int modified_device; - unsigned int modified_host; - - /* Return view on specific device via view() */ - - template< class Device > - const typename Kokkos::Impl::if_c< Kokkos::Impl::is_same< typename t_dev::memory_space , - typename Device::memory_space >::value , - t_dev , t_host >::type view() const - { - return Kokkos::Impl::if_c< Kokkos::Impl::is_same< typename t_dev::memory_space , - typename Device::memory_space >::value , - t_dev , t_host >::select( d_view , h_view ); - } - - - /* Construct views */ - - /* Empty Constructor */ - - DualView() { - modified_host = 0; - modified_device = 0; - } - - /* Create view with allocation on both host and device */ - - DualView( const std::string & label , - const size_t n0 = 0 , - const size_t n1 = 0 , - const size_t n2 = 0 , - const size_t n3 = 0 , - const size_t n4 = 0 , - const size_t n5 = 0 , - const size_t n6 = 0 , - const size_t n7 = 0 ) - : d_view( label, n0, n1, n2, n3, n4, n5, n6, n7 ) - , h_view( create_mirror_view( d_view ) ) - { - modified_host = 0; - modified_device = 0; - } - - /* Update data on device or host only if other space is polluted */ - - template - void sync() { - unsigned int dev = Kokkos::Impl::if_c< Kokkos::Impl::is_same< typename t_dev::memory_space , - typename Device::memory_space >::value , - unsigned int , unsigned int >::select( 1, 0 ); - - if(dev) { - if((modified_host > 0) && (modified_host >= modified_device)) { - Kokkos::deep_copy(d_view,h_view); - modified_host = modified_device = 0; - } - } else { - if((modified_device > 0) && (modified_device >= modified_host)) { - Kokkos::deep_copy(h_view,d_view); - modified_host = modified_device = 0; - } - } - } - - /* Mark data as dirty on a device */ - - template - void modify() { - unsigned int dev = Kokkos::Impl::if_c< Kokkos::Impl::is_same< typename t_dev::memory_space , - typename Device::memory_space >::value , - unsigned int , unsigned int >::select( 1, 0 ); - - if(dev) { - modified_device = (modified_device > modified_host ? modified_device : modified_host) + 1; - } else { - modified_host = (modified_device > modified_host ? modified_device : modified_host) + 1; - } - } - - /* Realloc both views, no deep copy */ - - void realloc( const size_t n0 = 0 , - const size_t n1 = 0 , - const size_t n2 = 0 , - const size_t n3 = 0 , - const size_t n4 = 0 , - const size_t n5 = 0 , - const size_t n6 = 0 , - const size_t n7 = 0 ) { - Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); - h_view = create_mirror_view( d_view ); - - /* Reset dirty flags */ - modified_device = modified_host = 0; - } - - /* Resize both views, only do deep_copy in space which was last marked as dirty */ - - void resize( const size_t n0 = 0 , - const size_t n1 = 0 , - const size_t n2 = 0 , - const size_t n3 = 0 , - const size_t n4 = 0 , - const size_t n5 = 0 , - const size_t n6 = 0 , - const size_t n7 = 0 ) { - if(modified_device >= modified_host) { - /* Resize on Device */ - Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7); - h_view = create_mirror_view( d_view ); - - /* Mark Device copy as modified */ - modified_device++; - - } else { - /* Realloc on Device */ - - Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); - t_host temp_view = create_mirror_view( d_view ); - - /* Remap on Host */ - Kokkos::Impl::ViewRemap< t_host , t_host >( temp_view , h_view ); - h_view = temp_view; - - /* Mark Host copy as modified */ - modified_host++; - } - } - - size_t capacity() const { - return d_view.capacity(); - } -}; -} -#endif diff --git a/kokkos/kokkos/containers/src/Kokkos_Functional.hpp b/kokkos/kokkos/containers/src/Kokkos_Functional.hpp deleted file mode 100644 index eb327af..0000000 --- a/kokkos/kokkos/containers/src/Kokkos_Functional.hpp +++ /dev/null @@ -1,196 +0,0 @@ -#ifndef KOKKOS_FUNCTIONAL_HPP -#define KOKKOS_FUNCTIONAL_HPP - -#include -#include - -namespace Kokkos { - -namespace Impl { - -// MurmurHash3 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. -KOKKOS_FORCEINLINE_FUNCTION -uint32_t getblock32 ( const uint8_t * p, int i ) -{ -// used to avoid aliasing error which could cause errors with -// forced inlining - return ((uint32_t)p[i*4+0]) - | ((uint32_t)p[i*4+1] << 8) - | ((uint32_t)p[i*4+2] << 16) - | ((uint32_t)p[i*4+3] << 24); -} - -KOKKOS_FORCEINLINE_FUNCTION -uint32_t rotl32 ( uint32_t x, int8_t r ) -{ return (x << r) | (x >> (32 - r)); } - -KOKKOS_FORCEINLINE_FUNCTION -uint32_t fmix32 ( uint32_t h ) -{ - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - - return h; -} - -KOKKOS_INLINE_FUNCTION -uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 4; - - uint32_t h1 = seed; - - const uint32_t c1 = 0xcc9e2d51; - const uint32_t c2 = 0x1b873593; - - //---------- - // body - - for(int i=0; i -struct hash -{ - typedef T argument_type; - typedef T first_argument_type; - typedef uint32_t second_argument_type; - typedef uint32_t result_type; - - KOKKOS_FORCEINLINE_FUNCTION - uint32_t operator()(T const & t) const - { return Impl::MurmurHash3_x86_32( &t, sizeof(T), 0); } - - KOKKOS_FORCEINLINE_FUNCTION - uint32_t operator()(T const & t, uint32_t seed) const - { return Impl::MurmurHash3_x86_32( &t, sizeof(T), seed); } -}; - - - -template -struct equal_to -{ - typedef T first_argument_type; - typedef T second_argument_type; - typedef bool result_type; - - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(T const & a, T const & b) const - { return a == b; } -}; - -template -struct not_equal_to -{ - typedef T first_argument_type; - typedef T second_argument_type; - typedef bool result_type; - - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(T const & a, T const & b) const - { return a != b; } -}; - - -template -struct greater -{ - typedef T first_argument_type; - typedef T second_argument_type; - typedef bool result_type; - - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(T const & a, T const & b) const - { return a > b; } -}; - - -template -struct less -{ - typedef T first_argument_type; - typedef T second_argument_type; - typedef bool result_type; - - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(T const & a, T const & b) const - { return a < b; } -}; - -template -struct greater_equal -{ - typedef T first_argument_type; - typedef T second_argument_type; - typedef bool result_type; - - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(T const & a, T const & b) const - { return a >= b; } -}; - - -template -struct less_equal -{ - typedef T first_argument_type; - typedef T second_argument_type; - typedef bool result_type; - - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(T const & a, T const & b) const - { return a <= b; } -}; - -} // namespace Kokkos - - -#endif //KOKKOS_FUNCTIONAL_HPP - - diff --git a/kokkos/kokkos/containers/src/Kokkos_Pair.hpp b/kokkos/kokkos/containers/src/Kokkos_Pair.hpp deleted file mode 100644 index 8fc39aa..0000000 --- a/kokkos/kokkos/containers/src/Kokkos_Pair.hpp +++ /dev/null @@ -1,287 +0,0 @@ -/// \file Kokkos_Pair.hpp -/// \brief Declaration and definition of Kokkos::pair. -/// -/// This header file declares and defines Kokkos::pair and its related -/// nonmember functions. - -#ifndef KOKKOS_CONTAINERS_PAIR_HPP -#define KOKKOS_CONTAINERS_PAIR_HPP - -#include -#include -#include - -namespace Kokkos { -/// \struct pair -/// \brief Replacement for std::pair that works on CUDA devices. -/// -/// The instance methods of std::pair, including its constructors, are -/// not marked as __device__ functions. Thus, they cannot be -/// called on a CUDA device, such as an NVIDIA GPU. This struct -/// implements the same interface as std::pair, but can be used on a -/// CUDA device as well as on the host. -template -struct pair -{ - //! The first template parameter of this class. - typedef T1 first_type; - //! The second template parameter of this class. - typedef T2 second_type; - - //! The first element of the pair. - first_type first; - //! The second element of the pair. - second_type second; - - /// \brief Default constructor. - /// - /// This calls the default constructors of T1 and T2. It won't - /// compile if those default constructors are not defined and - /// public. - KOKKOS_FORCEINLINE_FUNCTION - pair() - : first(), second() - {} - - /// \brief Constructor that takes both elements of the pair. - /// - /// This calls the copy constructors of T1 and T2. It won't compile - /// if those copy constructors are not defined and public. - KOKKOS_FORCEINLINE_FUNCTION - pair(const first_type & f, const second_type & s) - : first(f), second(s) - {} - - /// \brief Copy constructor. - /// - /// This calls the copy constructors of T1 and T2. It won't compile - /// if those copy constructors are not defined and public. - template - KOKKOS_FORCEINLINE_FUNCTION - pair( const pair &p) - : first(p.first), second(p.second) - {} - - /// \brief Assignment operator. - /// - /// This calls the assignment operators of T1 and T2. It won't - /// compile if the assignment operators are not defined and public. - template - KOKKOS_FORCEINLINE_FUNCTION - pair & operator=(const pair &p) - { - first = p.first; - second = p.second; - return *this; - } - - // from std::pair - template - pair( const std::pair &p) - : first(p.first), second(p.second) - {} - - /// \brief Return the std::pair version of this object. - /// - /// This is not a device function; you may not call it on a - /// CUDA device. It is meant to be called on the host, if the user - /// wants an std::pair instead of a Kokkos::pair. - /// - /// \note This is not a conversion operator, since defining a - /// conversion operator made the relational operators have - /// ambiguous definitions. - std::pair to_std_pair() const - { return std::make_pair(first,second); } -}; - -//! Equality operator for Kokkos::pair. -template -KOKKOS_FORCEINLINE_FUNCTION -bool operator== (const pair& lhs, const pair& rhs) -{ return lhs.first==rhs.first && lhs.second==rhs.second; } - -//! Inequality operator for Kokkos::pair. -template -KOKKOS_FORCEINLINE_FUNCTION -bool operator!= (const pair& lhs, const pair& rhs) -{ return !(lhs==rhs); } - -//! Less-than operator for Kokkos::pair. -template -KOKKOS_FORCEINLINE_FUNCTION -bool operator< (const pair& lhs, const pair& rhs) -{ return lhs.first -KOKKOS_FORCEINLINE_FUNCTION -bool operator<= (const pair& lhs, const pair& rhs) -{ return !(rhs -KOKKOS_FORCEINLINE_FUNCTION -bool operator> (const pair& lhs, const pair& rhs) -{ return rhs -KOKKOS_FORCEINLINE_FUNCTION -bool operator>= (const pair& lhs, const pair& rhs) -{ return !(lhs -KOKKOS_FORCEINLINE_FUNCTION -pair make_pair (T1 x, T2 y) -{ return ( pair(x,y) ); } - -/// \brief Return a pair of references to the input arguments. -/// -/// This compares to std::tie (new in C++11). You can use it to -/// assign to two variables at once, from the result of a function -/// that returns a pair. For example (__device__ and -/// __host__ attributes omitted for brevity): -/// \code -/// // Declaration of the function to call. -/// // First return value: operation count. -/// // Second return value: whether all operations succeeded. -/// Kokkos::pair someFunction (); -/// -/// // Code that uses Kokkos::tie. -/// int myFunction () { -/// int count = 0; -/// bool success = false; -/// -/// // This assigns to both count and success. -/// Kokkos::tie (count, success) = someFunction (); -/// -/// if (! success) { -/// // ... Some operation failed; -/// // take corrective action ... -/// } -/// return count; -/// } -/// \endcode -/// -/// The line that uses tie() could have been written like this: -/// \code -/// Kokkos::pair result = someFunction (); -/// count = result.first; -/// success = result.second; -/// \endcode -/// -/// Using tie() saves two lines of code and avoids a copy of each -/// element of the pair. The latter could be significant if one or -/// both elements of the pair are more substantial objects than \c int -/// or \c bool. -template -KOKKOS_FORCEINLINE_FUNCTION -pair tie (T1 & x, T2 & y) -{ return ( pair(x,y) ); } - -// -// Specialization of Kokkos::pair for a \c void second argument. This -// is not actually a "pair"; it only contains one element, the first. -// -template -struct pair -{ - typedef T1 first_type; - typedef void second_type; - - first_type first; - enum { second = 0 }; - - KOKKOS_FORCEINLINE_FUNCTION - pair() - : first() - {} - - KOKKOS_FORCEINLINE_FUNCTION - pair(const first_type & f) - : first(f) - {} - - KOKKOS_FORCEINLINE_FUNCTION - pair(const first_type & f, int) - : first(f) - {} - - template - KOKKOS_FORCEINLINE_FUNCTION - pair( const pair &p) - : first(p.first) - {} - - template - KOKKOS_FORCEINLINE_FUNCTION - pair & operator=(const pair &p) - { - first = p.first; - return *this; - } -}; - -// -// Specialization of relational operators for Kokkos::pair. -// - -template -KOKKOS_FORCEINLINE_FUNCTION -bool operator== (const pair& lhs, const pair& rhs) -{ return lhs.first==rhs.first; } - -template -KOKKOS_FORCEINLINE_FUNCTION -bool operator!= (const pair& lhs, const pair& rhs) -{ return !(lhs==rhs); } - -template -KOKKOS_FORCEINLINE_FUNCTION -bool operator< (const pair& lhs, const pair& rhs) -{ return lhs.first -KOKKOS_FORCEINLINE_FUNCTION -bool operator<= (const pair& lhs, const pair& rhs) -{ return !(rhs -KOKKOS_FORCEINLINE_FUNCTION -bool operator> (const pair& lhs, const pair& rhs) -{ return rhs -KOKKOS_FORCEINLINE_FUNCTION -bool operator>= (const pair& lhs, const pair& rhs) -{ return !(lhs -struct hash< pair > -{ - typedef pair argument_type; - typedef pair first_argument_type; - typedef uint32_t second_argument_type; - typedef uint32_t result_type; - - KOKKOS_FORCEINLINE_FUNCTION - uint32_t operator()( const pair & p, uint32_t seed = 0u) const - { - typedef hash hash1; - typedef hash hash2; - return hash1(p.first, hash2(p.second,seed)); - } -}; - - -} // namespace Kokkos - - -#endif //KOKKOS_CONTAINERS_PAIR_HPP diff --git a/kokkos/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp b/kokkos/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp deleted file mode 100644 index 4c66068..0000000 --- a/kokkos/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp +++ /dev/null @@ -1,175 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_STATICCRSGRAPH_HPP -#define KOKKOS_STATICCRSGRAPH_HPP - -#include -#include - -#include - -namespace Kokkos { - -/// \class StaticCrsGraph -/// \brief Compressed row storage array. -/// -/// \tparam DataType The type of stored entries. If a StaticCrsGraph is -/// used as the graph of a sparse matrix, then this is usually an -/// integer type, the type of the column indices in the sparse -/// matrix. -/// -/// \tparam Arg1Type The second template parameter, corresponding -/// either to the Device type (if there are no more template -/// parameters) or to the Layout type (if there is at least one more -/// template parameter). -/// -/// \tparam Arg2Type The third template parameter, which if provided -/// corresponds to the Device type. -/// -/// \tparam SizeType The type of row offsets. Usually the default -/// parameter suffices. However, setting a nondefault value is -/// necessary in some cases, for example, if you want to have a -/// sparse matrices with dimensions (and therefore column indices) -/// that fit in \c int, but want to store more than INT_MAX -/// entries in the sparse matrix. -/// -/// A row has a range of entries: -///
    -///
  • row_map[i0] <= entry < row_map[i0+1]
  • -///
  • 0 <= i1 < row_map[i0+1] - row_map[i0]
  • -///
  • entries( entry , i2 , i3 , ... );
  • -///
  • entries( row_map[i0] + i1 , i2 , i3 , ... );
  • -///
-template< class DataType, - class Arg1Type, - class Arg2Type = void, - typename SizeType = typename ViewTraits::size_type> -class StaticCrsGraph { -private: - typedef ViewTraits traits; - -public: - typedef DataType data_type; - typedef typename traits::array_layout array_layout; - typedef typename traits::device_type device_type; - typedef SizeType size_type; - - typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type; - typedef StaticCrsGraph< DataType , array_layout , typename device_type::host_mirror_device_type , SizeType > HostMirror; - //typedef StaticCrsGraph< DataType , array_layout , Kokkos::Threads , SizeType > HostMirror; - typedef View< const size_type* , array_layout, device_type > row_map_type; - typedef View< DataType* , array_layout, device_type > entries_type; - - entries_type entries; - row_map_type row_map; - - //! Construct an empty view. - StaticCrsGraph () : entries(), row_map() {} - - //! Copy constructor (shallow copy). - StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map) - {} - - template - StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_) - {} - - /** \brief Assign to a view of the rhs array. - * If the old view is the last view - * then allocated memory is deallocated. - */ - StaticCrsGraph& operator= (const StaticCrsGraph& rhs) { - entries = rhs.entries; - row_map = rhs.row_map; - return *this; - } - - /** \brief Destroy this view of the array. - * If the last view then allocated memory is deallocated. - */ - ~StaticCrsGraph() {} -}; - -//---------------------------------------------------------------------------- - -template< class StaticCrsGraphType , class InputSizeType > -typename StaticCrsGraphType::staticcrsgraph_type -create_staticcrsgraph( const std::string & label , - const std::vector< InputSizeType > & input ); - -template< class StaticCrsGraphType , class InputSizeType > -typename StaticCrsGraphType::staticcrsgraph_type -create_staticcrsgraph( const std::string & label , - const std::vector< std::vector< InputSizeType > > & input ); - -//---------------------------------------------------------------------------- - -template< class DataType , - class Arg1Type , - class Arg2Type , - typename SizeType > -typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror -create_mirror_view( const StaticCrsGraph & input ); - -template< class DataType , - class Arg1Type , - class Arg2Type , - typename SizeType > -typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror -create_mirror( const StaticCrsGraph & input ); - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_CRSARRAY_HPP */ - diff --git a/kokkos/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/kokkos/kokkos/containers/src/Kokkos_UnorderedMap.hpp deleted file mode 100644 index 5671734..0000000 --- a/kokkos/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ /dev/null @@ -1,1044 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -/// \file Kokkos_UnorderedMap.hpp -/// \brief Declaration and definition of Kokkos::UnorderedMap. -/// -/// This header file declares and defines Kokkos::UnorderedMap and its -/// related nonmember functions. - -#ifndef KOKKOS_UNORDERED_MAP_HPP -#define KOKKOS_UNORDERED_MAP_HPP - -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include - -namespace Kokkos { - -/// \class UnorderedMap -/// \brief Thread-safe, performance-portable lookup table. -/// -/// This class provides a lookup table. In terms of functionality, -/// this class compares to std::unordered_map (new in C++11). -/// "Unordered" means that keys are not stored in any particular -/// order, unlike (for example) std::map. "Thread-safe" means that -/// lookups, insertion, and deletion are safe to call by multiple -/// threads in parallel. "Performance-portable" means that parallel -/// performance of these operations is reasonable, on multiple -/// hardware platforms. Platforms on which performance has been -/// tested include conventional Intel x86 multicore processors, Intel -/// Xeon Phi ("MIC"), and NVIDIA GPUs. -/// -/// Parallel performance portability entails design decisions that -/// might differ from one's expectation for a sequential interface. -/// This particularly affects insertion of single elements. In an -/// interface intended for sequential use, insertion might reallocate -/// memory if the original allocation did not suffice to hold the new -/// element. In this class, insertion does not reallocate -/// memory. This means that it might fail. insert() returns an enum -/// which indicates whether the insert failed. There are three -/// possible conditions: -///
    -///
  1. INSERT_FAILED: The insert failed. This usually -/// means that the UnorderedMap ran out of space.
  2. -///
  3. INSERT_SUCCESS: The insert succeeded, and the key -/// did not exist in the table before.
  4. -///
  5. INSERT_EXISTING: The insert succeeded, and the key -/// did exist in the table before. The new value was -/// ignored and the old value was left in place.
  6. -///
-/// -/// Users can access the number of failed insertions thus far by -/// calling failed_inserts(). This requires computation, and thus is -/// a computational kernel, not a device function. Once users -/// have the number of failed inserts, they may reserve() as much -/// space as they need and add the remaining elements (in a second -/// CUDA kernel launch, if applicable). We reiterate: users may -/// not call these methods in a parallel computational kernel. -/// They must run their parallel operation to completion, then call -/// failed_inserts(), reserve() if necessary, and run another parallel -/// kernel to add any remaining elements. -/// -/// \tparam Key Type of keys of the lookup table. If \c const, users -/// are not allowed to add or remove keys, though they are allowed -/// to change values. In that case, the implementation may make -/// optimizations specific to the Device. For example, if -/// Device is \c Cuda, it may use texture fetches to access -/// keys. -/// -/// \tparam T Type of values stored in the lookup table. You may use -/// \c void here, in which case the table will be a set of keys. If -/// \c const, users are not allowed to add, remove, or change -/// entries. In that case, the implementation may make -/// optimizations specific to the \c Device, such as using texture -/// fetches to access values. -/// -/// \tparam Device The Kokkos Device type. -/// -/// \tparam Compare Definition of the less-than comparison function -/// for instances of Key. If you rely on the default -/// template parameter for \c Hash, then there must be a -/// specialization of Kokkos::less for \c Key (without the \c const, -/// if \c Key is const). -/// -/// \tparam Hash Definition of the hash function for instances of -/// Key. If you rely on the default template parameter for -/// \c Hash, then there must be a specialization of Kokkos::hash for -/// \c Key (without the \c const, if \c Key is const). -template < typename Key - , typename T - , typename Device - , typename Compare = less::type> - , typename Hash = hash::type> - > -class UnorderedMap; - - -// Specialization of deep_copy for two UnorderedMap objects. -template < typename DKey, typename DT, typename DDevice - , typename SKey, typename ST, typename SDevice - , typename Compare, typename Hash > -inline void deep_copy( UnorderedMap & dst - , const UnorderedMap & src ) -{ - Impl::UnorderedMap::deep_copy_impl(dst, src); -} - - -/// \brief First element of the return value of UnorderedMap::insert(). -/// -/// Inserting an element into an UnorderedMap is not guaranteed to -/// succeed. There are three possible conditions: -///
    -///
  1. INSERT_FAILED: The insert failed. This usually -/// means that the UnorderedMap ran out of space.
  2. -///
  3. INSERT_SUCCESS: The insert succeeded, and the key -/// did not exist in the table before.
  4. -///
  5. INSERT_EXISTING: The insert succeeded, and the key -/// did exist in the table before. The new value was -/// ignored and the old value was left in place.
  6. -///
-enum UnorderedMap_insert_state -{ - INSERT_FAILED - , INSERT_SUCCESS - , INSERT_EXISTING -}; - - -// Specialization of UnorderedMap for nonconst Key and value (T). -template < typename Key - , typename T - , typename Device - , typename Compare - , typename Hash - > -class UnorderedMap -{ -public: - //! \name Public types and constants - //@{ - - typedef Impl::UnorderedMap::map_data map_data; - typedef Impl::UnorderedMap::node_atomic node_atomic; - - typedef typename map_data::device_type device_type; - typedef typename map_data::compare_type compare_type; - typedef typename map_data::hash_type hash_type; - typedef typename map_data::key_type key_type; - typedef typename map_data::mapped_type mapped_type; - typedef typename map_data::value_type value_type; - typedef typename map_data::pointer pointer; - typedef typename map_data::const_pointer const_pointer; - typedef typename map_data::node_type node_type; - typedef typename map_data::node_block_type node_block_type; - typedef typename map_data::size_type size_type; - - typedef pair insert_result; - - typedef UnorderedMap HostMirror; - - //@} -private: - - typedef typename Impl::if_c< map_data::has_void_mapped_type - , int - , mapped_type - >::type insert_mapped_type; - -public: - //! \name Public member functions - //@{ - - /// \brief Constructor - /// - /// \param arg_num_nodes [in] Initial requested maximum number of - /// entries in the hash table. - /// \param compare [in] Less-than comparison function for \c Key - /// instances. The default value usually suffices. - /// \param hash [in] Hash function for \c Key instances. The - /// default value usually suffices. - UnorderedMap( uint32_t arg_num_nodes = 0 - , compare_type compare = compare_type() - , hash_type hash = hash_type() - ) - : m_data( arg_num_nodes - , compare - , hash - ) - {} - - //! Clear all entries in the table. - void clear() - { - m_data = map_data(0, m_data.key_compare, m_data.key_hash); - } - - //! If the table is larger than necessary, shrink it to fit. - void shrink_to_fit() - { reserve(0); } - - /// \brief Reserve space for \c new_capacity entries. - /// - /// This is not a device function; it may not be - /// called in a parallel kernel. - void reserve(unsigned new_capacity) - { - const uint32_t curr_size = size(); - new_capacity = new_capacity < curr_size ? curr_size : new_capacity; - - UnorderedMap - tmp(new_capacity, m_data.key_compare, m_data.key_hash); - - if (new_capacity > 0u && failed_inserts() == 0u ) { - Impl::UnorderedMap::copy_map(tmp,*this); - } - *this = tmp; - } - - /// \brief Check sanity of the hash table. - /// - /// "Sanity" means integrity of data structures. Checking this is - /// useful for debugging. - void check_sanity() const - { m_data.check_sanity(); } - - /// \brief The number of entries in the table. - /// - /// Note that this is not a device function; it cannot be called in - /// a parallel kernel. The value is not stored as a variable; it - /// must be computed. - uint32_t size() const - { return m_data.size(); } - - /// \brief The number of unused entries in the table. - /// - /// This is not a device function; it may not be - /// called in a parallel kernel. The value is not stored as a - /// variable; it must be computed. - uint32_t unused() const - { return m_data.unused(); } - - /// \brief The number of entries pending deletion in the table. - /// - /// This is not a device function; it may not be - /// called in a parallel kernel. The value is not stored as a - /// variable; it must be computed. - uint32_t pending_delete() const - { return m_data.pending_delete(); } - - /// \brief The current number of failed insert() calls. - /// - /// This is not a device function; it may not be - /// called in a parallel kernel. The value is not stored as a - /// variable; it must be computed. - uint32_t failed_inserts() const - { return m_data.failed_inserts(); } - - /// \brief The maximum number of entries that the table can hold. - /// - /// This is a device function; it may be called in a parallel - /// kernel. - KOKKOS_INLINE_FUNCTION - uint32_t capacity() const - { return m_data.capacity(); } - - /// \brief The number of hash table "buckets." - /// - /// This is different than the number of entries that the table can - /// hold. Each key hashes to an index in [0, hash_capacity() - 1]. - /// That index can hold zero or more entries. This class decides - /// what hash_capacity() should be, given the user's upper bound on - /// the number of entries the table must be able to hold. - /// - /// This is a device function; it may be called in a parallel - /// kernel. - KOKKOS_INLINE_FUNCTION - uint32_t hash_capacity() const - { return m_data.hash_capacity(); } - - /// \brief Remove entries that are pending deletion. - /// - /// The mark_pending_delete() method marks an entry as "pending - /// deletion." This method actually removes such entries from the - /// table. - /// - /// This is not a device function; it may not be - /// called in a parallel kernel. - void remove_pending_delete() const - { return m_data.remove_pending_delete_keys(); } - - //--------------------------------------------------------------------------- - //--------------------------------------------------------------------------- - - /// \brief Attempt to insert the given (key, value) pair. - /// - /// This is a device function; it may be called in a parallel - /// kernel. As discussed in the class documentation, it need not - /// succeed. The return value tells you if it did. - /// - /// \param k [in] The key to attempt to insert. - /// \param v [in] The corresponding value to attempt to insert. If - /// using this class as a set (with T = void), then you need not - /// provide this value. - KOKKOS_INLINE_FUNCTION - insert_result insert(const key_type & k, const insert_mapped_type & v = insert_mapped_type()) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - - m_data.set_modified(); - - insert_result result(INSERT_FAILED,NULL); - - const uint32_t hash_value = m_data.key_hash(k); - const uint32_t hash_index = hash_value % m_data.hashes.size(); - - uint32_t node_index = node_atomic::invalid_next; - - bool curr_equal = false; - uint32_t curr_index = node_atomic::invalid_next; - volatile uint64_t * prev_atomic = & m_data.hashes[hash_index].value; - uint64_t prev = 0u; - - find_previous(k,prev_atomic,prev,curr_equal,curr_index); - - do { - if (curr_equal) { - if (node_index != node_atomic::invalid_next) { - // release any node that was claimed by this thread - m_data.get_node(node_index).atomic = node_atomic::make_atomic(node_atomic::invalid_next, Impl::UnorderedMap::UNUSED); -#if defined( __CUDA_ARCH__ ) - __threadfence(); -#endif - volatile int * used_count = &m_data.node_blocks[node_index>>node_block_type::shift].used_count; - atomic_fetch_add(used_count, -1); - } - // Node already exist - result = insert_result(INSERT_EXISTING, &m_data.get_node(curr_index).value); - break; - } - else { - // try to insert here - if (node_index == node_atomic::invalid_next) { - node_index = find_unused_node(hash_value); - if (node_index == node_atomic::invalid_next) { - // unable to obtain an unused node - break; - } - } - // this thread has unique control of the node - // so can construct the value and set up the state and next index - node_type & n = m_data.get_node(node_index); - n.destruct_value(); - n.construct_value(value_type(k,v)); - n.atomic = node_atomic::make_atomic( curr_index, Impl::UnorderedMap::USED); - - uint64_t new_atomic = node_atomic::make_atomic( node_index, node_atomic::state(prev)); - -#if defined( __CUDA_ARCH__ ) - __threadfence(); -#endif - const bool ok = atomic_compare_exchange_strong( prev_atomic, prev, new_atomic); - if ( ok ) { - // successfully inserted the node - result = insert_result(INSERT_SUCCESS, &n.value); - break; - } - } - // insert failed -- find correct insertion point again - find_previous(k,prev_atomic,prev,curr_equal,curr_index); - } while (true); - return result; - } - - /// \brief Mark the given key for deletion. - /// - /// This does not actually free memory; it just marks the entry of - /// the table with the given key \c k as deleted. - /// - /// This is a device function; it may be called in a parallel - /// kernel. - KOKKOS_INLINE_FUNCTION - void mark_pending_delete(const key_type & k) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - - m_data.set_modified(); - - const uint32_t hash_value = m_data.key_hash(k); - const uint32_t hash_index = hash_value % m_data.hashes.size(); - - uint32_t node_index = node_atomic::invalid_next; - - bool curr_equal = false; - uint32_t curr_index = node_atomic::invalid_next; - volatile uint64_t * prev_atomic = & m_data.hashes[hash_index].value; - uint64_t prev = 0u; - - find_previous(k,prev_atomic,prev,curr_equal,curr_index); - - do { - if (curr_equal) { - if (node_index != node_atomic::invalid_next) { - // release any node that was claimed by this thread - m_data.get_node(node_index).atomic = node_atomic::make_atomic(node_atomic::invalid_next, Impl::UnorderedMap::UNUSED); -#if defined( __CUDA_ARCH__ ) - __threadfence(); -#endif - volatile int * used_count = &m_data.node_blocks[node_index>>node_block_type::shift].used_count; - atomic_fetch_add(used_count, -1); - } - // mark the current node as deleted - volatile uint64_t * curr_atomic_ptr = &m_data.get_node(curr_index).atomic.value; - uint64_t curr_atomic = *curr_atomic_ptr; - while ( node_atomic::state(curr_atomic) == Impl::UnorderedMap::USED) { - uint64_t new_atomic = node_atomic::make_atomic( node_atomic::next(curr_atomic), Impl::UnorderedMap::PENDING_DELETE); - curr_atomic = atomic_compare_exchange(curr_atomic_ptr,curr_atomic,new_atomic); - } - return; - } - else { - // key does not exist - // insert a node with the given key marked as deleted - if (node_index == node_atomic::invalid_next) { - node_index = find_unused_node(hash_value); - if (node_index == node_atomic::invalid_next) { - return; - } - } - - // this thread has unique control of the node - // so can construct the value and set up the state and next index - node_type & n = m_data.get_node(node_index); - n.destruct_value(); - n.construct_value(value_type(k,insert_mapped_type())); - n.atomic = node_atomic::make_atomic( curr_index, Impl::UnorderedMap::PENDING_DELETE); - - uint64_t new_atomic = node_atomic::make_atomic( node_index, node_atomic::state(prev)); - -#if defined( __CUDA_ARCH__ ) - __threadfence(); -#endif - - const bool ok = atomic_compare_exchange_strong( prev_atomic, prev, new_atomic); - if ( ok ) { - return; - } - } - // insert failed -- find correct insertion point again - find_previous(k,prev_atomic,prev,curr_equal,curr_index); - } while (true); - } - - // TODO protect with enable_if - KOKKOS_INLINE_FUNCTION - void mark_pending_delete( const_pointer p ) const - { - if (p) mark_pending_delete(p->first); - } - - - /// \brief Find the given key \c k, if it exists in the table. - /// - /// \return If the key exists in the table, a (raw) pointer to the - /// value corresponding to that key; otherwise, \c NULL. - /// - /// This is a device function; it may be called in a parallel - /// kernel. - KOKKOS_INLINE_FUNCTION - pointer find( const key_type & k) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - - const uint32_t node_index = m_data.find_node_index(k); - return (node_index != node_atomic::invalid_next) ? &m_data.get_node(node_index).value : NULL; - } - - /// \brief Get a pointer to the value with \c i as its direct index. - /// - /// \warning This method is only for expert users. - /// - /// \param i [in] Index directly into the array of entries. - /// - /// \return If the entry exists in the table, a (raw) pointer to the - /// value; otherwise, \c NULL. - /// - /// This is a device function; it may be called in a parallel - /// kernel. - KOKKOS_INLINE_FUNCTION - pointer get_value(uint64_t i) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - - // add one to pass 0th node - const bool valid_range = i < m_data.capacity(); - const bool used_node = node_atomic::state(m_data.get_node(i).atomic) == Impl::UnorderedMap::USED; - - return valid_range && used_node ? &m_data.get_node(i).value : NULL; - } - -private: // private member functions - - KOKKOS_INLINE_FUNCTION - uint32_t find_unused_node(uint32_t hash_value) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - - const uint32_t num_blocks = m_data.node_blocks.size(); - const uint32_t start_block = hash_value % num_blocks; - const uint32_t end_block = start_block + num_blocks; - - if (m_data.no_failed_inserts()) { - - for (uint32_t i = start_block; i < end_block; ++i) { - if (!m_data.no_failed_inserts()) break; - - const uint32_t block = i % num_blocks; - volatile int * used_count = &m_data.node_blocks[block].used_count; - int count = * used_count; - if (static_cast(count) < node_block_type::size) { - //stores the old value into count - const int old_count = atomic_fetch_add(used_count, 1); - if (static_cast(old_count) < node_block_type::size) { - //claimed a node in this block keep looping block utill successful at claming a node - for (uint32_t start_node = (hash_value & node_block_type::mask); true; ++start_node) { - if (!m_data.no_failed_inserts()) break; - const uint32_t n = (block*node_block_type::size) + (start_node & node_block_type::mask); - volatile uint64_t * atomic = &m_data.get_node(n).atomic.value; - uint64_t value = *atomic; - if ( (node_atomic::state(value) == Impl::UnorderedMap::UNUSED) - && atomic_compare_exchange_strong(atomic, value, node_atomic::make_atomic(node_atomic::invalid_next,Impl::UnorderedMap::PENDING_INSERT)) ) - { - return n; - } - } - } - else { - //unable to claim a node from this block - atomic_fetch_add(used_count, -1); - } - } - } - // unable to get a free node -- insert failed - m_data.set_failed_insert(); - } - // count the failed insert - volatile int * failed_inserts = &m_data.node_blocks[start_block].failed_inserts; - atomic_fetch_add(failed_inserts, 1); - return node_atomic::invalid_next; - } - - KOKKOS_INLINE_FUNCTION - void find_previous(const key_type & k, volatile uint64_t *& prev_atomic, uint64_t & prev, bool &curr_equal, uint32_t & curr_index) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - - curr_equal = false; - do { - prev = *prev_atomic; - curr_index = node_atomic::next(prev); - const bool curr_invalid = curr_index == node_atomic::invalid_next; - - if (curr_invalid) break; - - // global read of the key - volatile const key_type * const key_ptr = &m_data.get_node(curr_index).value.first; - const key_type curr_key = *key_ptr; - - const bool curr_less = m_data.key_compare( curr_key, k); - const bool curr_greater = m_data.key_compare( k, curr_key); - curr_equal = !curr_less && !curr_greater; - - if (!curr_less) break; - - prev_atomic = & m_data.get_node(node_atomic::next(prev)).atomic.value; - } while (true); - } - -private: // private members - map_data m_data; - - template - friend class UnorderedMap; - - template < class MapDst, class MapSrc > - friend void Impl::UnorderedMap::deep_copy_impl( MapDst & dst, const MapSrc & src); -}; - - -//! Specialization of UnorderedMap for const Key and nonconst value (T). -template < typename Key - , typename T - , typename Device - , typename Compare - , typename Hash - > -class UnorderedMap< const Key, T, Device, Compare, Hash> -{ -public: // public types and constants - typedef Impl::UnorderedMap::map_data map_data; - typedef Impl::UnorderedMap::node_atomic node_atomic; - - typedef typename map_data::device_type device_type; - typedef typename map_data::compare_type compare_type; - typedef typename map_data::hash_type hash_type; - typedef typename map_data::key_type key_type; - typedef typename map_data::mapped_type mapped_type; - typedef typename map_data::value_type value_type; - typedef typename map_data::pointer pointer; - typedef typename map_data::const_pointer const_pointer; - typedef typename map_data::node_type node_type; - typedef typename map_data::size_type size_type; - - typedef UnorderedMap HostMirror; - -public: //public member functions - - UnorderedMap() - : m_data() - {} - - template - UnorderedMap( const UMap & umap ) - : m_data( umap.m_data ) - {} - - void clear() - { - m_data = map_data(0, m_data.key_compare, m_data.key_hash); - } - - void shrink_to_fit() - { reserve(0); } - - void reserve(unsigned new_capacity) - { - const uint32_t curr_size = size(); - new_capacity = new_capacity < curr_size ? curr_size : new_capacity; - - UnorderedMap - tmp(new_capacity, m_data.key_compare, m_data.key_hash); - - if (new_capacity > 0u && failed_inserts() == 0u ) { - Impl::UnorderedMap::copy_map(tmp,*this); - } - *this = tmp; - } - - void check_sanity() const - { m_data.check_sanity(); } - - uint32_t size() const - { return m_data.size(); } - - uint32_t unused() const - { return m_data.unused(); } - - uint32_t pending_delete() const - { return m_data.pending_delete(); } - - uint32_t failed_inserts() const - { return m_data.failed_inserts(); } - - KOKKOS_INLINE_FUNCTION - uint32_t capacity() const - { return m_data.capacity(); } - - KOKKOS_INLINE_FUNCTION - uint32_t hash_capacity() const - { return m_data.hash_capacity(); } - - void remove_pending_delete() const - { return m_data.remove_pending_delete_keys(); } - - KOKKOS_INLINE_FUNCTION - pointer find( const key_type & k) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - const uint32_t node_index = m_data.find_node_index(k); - return (node_index != node_atomic::invalid_next) ? &m_data.get_node(node_index).value : NULL; - } - - KOKKOS_INLINE_FUNCTION - pointer get_value(uint64_t i) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - // add one to pass 0th node - const bool valid_range = i < m_data.capacity(); - const bool used_node = node_atomic::state(m_data.get_node(i).atomic) == Impl::UnorderedMap::USED; - - return valid_range && used_node ? &m_data.get_node(i).value : NULL; - } - - - KOKKOS_INLINE_FUNCTION - void mark_pending_delete(const key_type & k) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - m_data.set_modified(); - - const uint32_t hash_value = m_data.key_hash(k); - const uint32_t hash_index = hash_value % m_data.hashes.size(); - - bool curr_equal = false; - uint32_t curr_index = node_atomic::invalid_next; - const volatile uint64_t * prev_atomic = & m_data.hashes[hash_index].value; - uint64_t prev = 0u; - - find_previous(k,prev_atomic,prev,curr_equal,curr_index); - - do { - if (curr_equal) { - // mark the current node as deleted - volatile uint64_t * curr_atomic_ptr = &m_data.get_node(curr_index).atomic.value; - uint64_t curr_atomic = *curr_atomic_ptr; - while ( node_atomic::state(curr_atomic) == Impl::UnorderedMap::USED) { - uint64_t new_atomic = node_atomic::make_atomic( node_atomic::next(curr_atomic), Impl::UnorderedMap::PENDING_DELETE); - curr_atomic = atomic_compare_exchange(curr_atomic_ptr,curr_atomic,new_atomic); - } - return; - } - } while (true); - } - - -private: - KOKKOS_INLINE_FUNCTION - void find_previous(const key_type & k, const volatile uint64_t *& prev_atomic, uint64_t & prev, bool &curr_equal, uint32_t & curr_index) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - curr_equal = false; - do { - prev = *prev_atomic; - curr_index = node_atomic::next(prev); - const bool curr_invalid = curr_index == node_atomic::invalid_next; - - if (curr_invalid) break; - - // global read of the key - volatile const key_type * const key_ptr = &m_data.get_node(curr_index).value.first; - const key_type curr_key = *key_ptr; - - const bool curr_less = m_data.key_compare( curr_key, k); - const bool curr_greater = m_data.key_compare( k, curr_key); - curr_equal = !curr_less && !curr_greater; - - if (!curr_less) break; - - prev_atomic = & m_data.get_node(node_atomic::next(prev)).atomic.value; - } while (true); - } - -private: // private members - map_data m_data; - - template - friend class UnorderedMap; - - template < class MapDst, class MapSrc > - friend void Impl::UnorderedMap::deep_copy_impl( MapDst & dst, const MapSrc & src); -}; - - -//! Specialization of UnorderedMap for const Key and const value (T). -template < typename Key - , typename T - , typename Device - , typename Compare - , typename Hash - > -class UnorderedMap< const Key, const T, Device, Compare, Hash> -{ -public: // public types and constants - typedef Impl::UnorderedMap::map_data map_data; - typedef Impl::UnorderedMap::node_atomic node_atomic; - - typedef typename map_data::device_type device_type; - typedef typename map_data::compare_type compare_type; - typedef typename map_data::hash_type hash_type; - typedef typename map_data::key_type key_type; - typedef typename map_data::mapped_type mapped_type; - typedef typename map_data::value_type value_type; - typedef typename map_data::pointer pointer; - typedef typename map_data::const_pointer const_pointer; - typedef typename map_data::node_type node_type; - typedef typename map_data::size_type size_type; - - typedef UnorderedMap HostMirror; - -public: //public member functions - - UnorderedMap() - : m_data() - {} - - template - UnorderedMap( const UMap & umap ) - : m_data( umap.m_data ) - {} - - void clear() - { - m_data = map_data(0, m_data.key_compare, m_data.key_hash); - } - - void shrink_to_fit() - { reserve(0); } - - void reserve(unsigned new_capacity) - { - const uint32_t curr_size = size(); - new_capacity = new_capacity < curr_size ? curr_size : new_capacity; - - UnorderedMap - tmp(new_capacity, m_data.key_compare, m_data.key_hash); - - if (new_capacity > 0u && failed_inserts() == 0u ) { - Impl::UnorderedMap::copy_map(tmp,*this); - } - *this = tmp; - } - - void check_sanity() const - { m_data.check_sanity(); } - - uint32_t size() const - { return m_data.size(); } - - uint32_t unused() const - { return m_data.unused(); } - - uint32_t pending_delete() const - { return m_data.pending_delete(); } - - uint32_t failed_inserts() const - { return m_data.failed_inserts(); } - - KOKKOS_INLINE_FUNCTION - uint32_t capacity() const - { return m_data.capacity(); } - - KOKKOS_INLINE_FUNCTION - uint32_t hash_capacity() const - { return m_data.hash_capacity(); } - - void remove_pending_delete() const - { return m_data.remove_pending_delete_keys(); } - - KOKKOS_INLINE_FUNCTION - const_pointer find( const key_type & k) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - const uint32_t node_index = m_data.find_node_index(k); - return (node_index != node_atomic::invalid_next) ? &m_data.get_node(node_index).value : NULL; - } - - KOKKOS_INLINE_FUNCTION - const_pointer get_value(uint64_t i) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - // add one to pass 0th node - const bool valid_range = i < m_data.capacity(); - const bool used_node = node_atomic::state(m_data.get_node(i).atomic) == Impl::UnorderedMap::USED; - - return valid_range && used_node ? &m_data.get_node(i).value : NULL; - } - -private: // private members - map_data m_data; - - template - friend class UnorderedMap; - - template < class MapDst, class MapSrc > - friend void Impl::UnorderedMap::deep_copy_impl( MapDst & dst, const MapSrc & src); -}; - - -//! Specialization of UnorderedMap for const Key and T=void ("set"). -template < typename Key - , typename Device - , typename Compare - , typename Hash - > -class UnorderedMap< const Key, void, Device, Compare, Hash> -{ -public: // public types and constants - typedef Impl::UnorderedMap::map_data map_data; - typedef Impl::UnorderedMap::node_atomic node_atomic; - - typedef typename map_data::device_type device_type; - typedef typename map_data::compare_type compare_type; - typedef typename map_data::hash_type hash_type; - typedef typename map_data::key_type key_type; - typedef typename map_data::mapped_type mapped_type; - typedef typename map_data::value_type value_type; - typedef typename map_data::pointer pointer; - typedef typename map_data::const_pointer const_pointer; - typedef typename map_data::node_type node_type; - typedef typename map_data::size_type size_type; - - typedef UnorderedMap HostMirror; - -public: //public member functions - - UnorderedMap() - : m_data() - {} - - template - UnorderedMap( const UMap & umap ) - : m_data( umap.m_data ) - {} - - void clear() - { - m_data = map_data(0, m_data.key_compare, m_data.key_hash); - } - - void shrink_to_fit() - { reserve(0); } - - void reserve(unsigned new_capacity) - { - const uint32_t curr_size = size(); - new_capacity = new_capacity < curr_size ? curr_size : new_capacity; - - UnorderedMap - tmp(new_capacity, m_data.key_compare, m_data.key_hash); - - if (new_capacity > 0u && failed_inserts() == 0u ) { - Impl::UnorderedMap::copy_map(tmp,*this); - } - *this = tmp; - } - - void check_sanity() const - { m_data.check_sanity(); } - - uint32_t size() const - { return m_data.size(); } - - uint32_t unused() const - { return m_data.unused(); } - - uint32_t pending_delete() const - { return m_data.pending_delete(); } - - uint32_t failed_inserts() const - { return m_data.failed_inserts(); } - - KOKKOS_INLINE_FUNCTION - uint32_t capacity() const - { return m_data.capacity(); } - - KOKKOS_INLINE_FUNCTION - uint32_t hash_capacity() const - { return m_data.hash_capacity(); } - - void remove_pending_delete() const - { return m_data.remove_pending_delete_keys(); } - - KOKKOS_INLINE_FUNCTION - const_pointer find( const key_type & k) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - const uint32_t node_index = m_data.find_node_index(k); - return (node_index != node_atomic::invalid_next) ? &m_data.get_node(node_index).value : NULL; - } - - KOKKOS_INLINE_FUNCTION - const_pointer get_value(uint64_t i) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - // add one to pass 0th node - const bool valid_range = i < m_data.capacity(); - const bool used_node = node_atomic::state(m_data.get_node(i).atomic) == Impl::UnorderedMap::USED; - - return valid_range && used_node ? &m_data.get_node(i).value : NULL; - } - -private: // private members - map_data m_data; - - template - friend class UnorderedMap; - - template < class MapDst, class MapSrc > - friend void Impl::UnorderedMap::deep_copy_impl( MapDst & dst, const MapSrc & src); -}; - - -} // namespace Kokkos - -#endif //KOKKOS_UNORDERED_MAP_HPP diff --git a/kokkos/kokkos/containers/src/Kokkos_Vector.hpp b/kokkos/kokkos/containers/src/Kokkos_Vector.hpp deleted file mode 100644 index cdcad2d..0000000 --- a/kokkos/kokkos/containers/src/Kokkos_Vector.hpp +++ /dev/null @@ -1,313 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_VECTOR_HPP -#define KOKKOS_VECTOR_HPP - -#include -#include - -/* Drop in replacement for std::vector based on Kokkos::DualView - * Most functions only work on the host (it will not compile if called from device kernel) - * - */ -#ifndef KOKKOS_HAVE_CUDA - #ifdef KOKKOS_HAVE_PTHREAD - #include - namespace Kokkos { - namespace Impl { - typedef Threads DefaultDeviceType; - } - } - #else - #ifdef KOKKOS_HAVE_OPENMP - #include - namespace Kokkos { - namespace Impl { - typedef OpenMP DefaultDeviceType; - } - } - #else - #ifdef KOKKOS_HAVE_SERIAL - #include - namespace Kokkos { - namespace Impl { - typedef Serial DefaultDeviceType; - } - } - #else - #error "No Kokkos Host Device defined" - #endif - #endif - #endif -#else - #include - namespace Kokkos { - namespace Impl { - typedef Cuda DefaultDeviceType; - } - } -#endif - namespace Kokkos { - -template -class vector : public DualView { -public: - typedef Device device_type; - typedef Scalar value_type; - typedef Scalar* pointer; - typedef const Scalar* const_pointer; - typedef Scalar* reference; - typedef const Scalar* const_reference; - typedef Scalar* iterator; - typedef const Scalar* const_iterator; - -private: - size_t _size; - typedef size_t size_type; - float _extra_storage; - typedef DualView DV; - - -public: - inline Scalar& operator() (int i) const {return DV::h_view(i);}; - inline Scalar& operator[] (int i) const {return DV::h_view(i);}; - - - /* Member functions which behave like std::vector functions */ - - vector():DV() { - _size = 0; - _extra_storage = 1.1; - DV::modified_host = 1; - }; - - - vector(int n, Scalar val=Scalar()):DualView("Vector",size_t(n*(1.1))) { - _size = n; - _extra_storage = 1.1; - DV::modified_host = 1; - - assign(n,val); - } - - - void resize(size_t n) { - if(n>=capacity()) - DV::resize(size_t (n*_extra_storage)); - _size = n; - } - - void resize(size_t n, const Scalar& val) { - assign(n,val); - } - - void assign (size_t n, const Scalar& val) { - - /* Resize if necessary (behavour of std:vector) */ - - if(n>capacity()) - DV::resize(size_t (n*_extra_storage)); - _size = n; - - /* Assign value either on host or on device */ - - if( DV::modified_host >= DV::modified_device ) { - set_functor_host f(DV::h_view,val); - parallel_for(n,f); - DV::t_host::device_type::fence(); - DV::modified_host++; - } else { - set_functor f(DV::d_view,val); - parallel_for(n,f); - DV::t_dev::device_type::fence(); - DV::modified_device++; - } - } - - void reserve(size_t n) { - DV::resize(size_t (n*_extra_storage)); - } - - void push_back(Scalar val) { - DV::modified_host++; - if(_size == capacity()) { - size_t new_size = _size*_extra_storage; - if(new_size == _size) new_size++; - DV::resize(new_size); - } - - DV::h_view(_size) = val; - _size++; - - }; - - void pop_back() { - _size--; - }; - - void clear() { - _size = 0; - } - - size_type size() const {return _size;}; - size_type max_size() const {return 2000000000;} - size_type capacity() const {return DV::capacity();}; - bool empty() const {return _size==0;}; - - iterator begin() const {return &DV::h_view(0);}; - - iterator end() const {return &DV::h_view(_size);}; - - - /* std::algorithms wich work originally with iterators, here they are implemented as member functions */ - - size_t lower_bound(const size_t &start, const size_t &end, const Scalar &comp_val) const { - - int lower = 0 > start ? 0 : start; - int upper = _size > end? end : _size-1; - if(upper<=lower) return end; - - - Scalar lower_val = DV::h_view(lower); - Scalar upper_val = DV::h_view(upper); - size_t idx = (upper+lower)/2; - Scalar val = DV::h_view(idx); - if(val>upper_val) return upper; - if(vallower) { - if(comp_val>val) { - lower = ++idx; - } else { - upper = idx; - } - idx = (upper+lower)/2; - val = DV::h_view(idx); - } - return idx; - } - - bool is_sorted() { - for(int i=0;i<_size-1;i++) { - if(DV::h_view(i)>DV::h_view(i+1)) return false; - } - return true; - } - - iterator find(Scalar val) const { - if(_size == 0) return end(); - - int upper,lower,current; - current = _size/2; - upper = _size-1; - lower = 0; - - if((valDV::h_view(_size-1)) ) return end(); - - while(upper>lower) - { - if(val>DV::h_view(current)) lower = current+1; - else upper = current; - current = (upper+lower)/2; - } - - if(val==DV::h_view(current)) return &DV::h_view(current); - else return end(); - } - - /* Additional functions for data management */ - - void device_to_host(){ - deep_copy(DV::h_view,DV::d_view); - } - void host_to_device() const { - deep_copy(DV::d_view,DV::h_view); - } - - void on_host() { - DV::modified_host = DV::modified_device + 1; - } - void on_device() { - DV::modified_device = DV::modified_host + 1; - } - - void set_overallocation(float extra) { - _extra_storage = 1.0 + extra; - } - - -public: - struct set_functor { - typedef typename DV::t_dev::device_type device_type; - typename DV::t_dev _data; - Scalar _val; - - set_functor(typename DV::t_dev data, Scalar val) : - _data(data),_val(val) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int &i) const { - _data(i) = _val; - } - }; - - struct set_functor_host { - typedef typename DV::t_host::device_type device_type; - typename DV::t_host _data; - Scalar _val; - - set_functor_host(typename DV::t_host data, Scalar val) : - _data(data),_val(val) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int &i) const { - _data(i) = _val; - } - }; - -}; -} -#endif diff --git a/kokkos/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp b/kokkos/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp deleted file mode 100644 index 15f2f0e..0000000 --- a/kokkos/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp +++ /dev/null @@ -1,211 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP -#define KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template< class DataType , class Arg1Type , class Arg2Type , typename SizeType > -inline -typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror -create_mirror_view( const StaticCrsGraph & view , - typename Impl::enable_if< ViewTraits::is_hostspace >::type * = 0 ) -{ - return view ; -} - -template< class DataType , class Arg1Type , class Arg2Type , typename SizeType > -inline -typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror -create_mirror( const StaticCrsGraph & view ) -{ - // Force copy: - typedef Impl::ViewAssignment< Impl::LayoutDefault > alloc ; - typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type ; - - typename staticcrsgraph_type::HostMirror tmp ; - typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map); - - // Allocation to match: - tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const' - tmp.entries = create_mirror( view.entries ); - - - // Deep copy: - deep_copy( tmp_row_map , view.row_map ); - deep_copy( tmp.entries , view.entries ); - - return tmp ; -} - -template< class DataType , class Arg1Type , class Arg2Type , typename SizeType > -inline -typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror -create_mirror_view( const StaticCrsGraph & view , - typename Impl::enable_if< ! ViewTraits::is_hostspace >::type * = 0 ) -{ - return create_mirror( view ); -} -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template< class StaticCrsGraphType , class InputSizeType > -inline -typename StaticCrsGraphType::staticcrsgraph_type -create_staticcrsgraph( const std::string & label , - const std::vector< InputSizeType > & input ) -{ - typedef StaticCrsGraphType output_type ; - typedef std::vector< InputSizeType > input_type ; - - typedef typename output_type::entries_type entries_type ; - - typedef View< typename output_type::size_type [] , - typename output_type::array_layout , - typename output_type::device_type > work_type ; - - output_type output ; - - // Create the row map: - - const size_t length = input.size(); - - { - work_type row_work( "tmp" , length + 1 ); - - typename work_type::HostMirror row_work_host = - create_mirror_view( row_work ); - - size_t sum = 0 ; - row_work_host[0] = 0 ; - for ( size_t i = 0 ; i < length ; ++i ) { - row_work_host[i+1] = sum += input[i]; - } - - deep_copy( row_work , row_work_host ); - - output.entries = entries_type( label , sum ); - output.row_map = row_work ; - } - - return output ; -} - -//---------------------------------------------------------------------------- - -template< class StaticCrsGraphType , class InputSizeType > -inline -typename StaticCrsGraphType::staticcrsgraph_type -create_staticcrsgraph( const std::string & label , - const std::vector< std::vector< InputSizeType > > & input ) -{ - typedef StaticCrsGraphType output_type ; - typedef std::vector< std::vector< InputSizeType > > input_type ; - typedef typename output_type::entries_type entries_type ; - typedef typename output_type::size_type size_type ; - - typedef typename - Impl::assert_shape_is_rank_one< typename entries_type::shape_type >::type - ok_rank ; - - typedef View< typename output_type::size_type [] , - typename output_type::array_layout , - typename output_type::device_type > work_type ; - - output_type output ; - - // Create the row map: - - const size_t length = input.size(); - - { - work_type row_work( "tmp" , length + 1 ); - - typename work_type::HostMirror row_work_host = - create_mirror_view( row_work ); - - size_t sum = 0 ; - row_work_host[0] = 0 ; - for ( size_t i = 0 ; i < length ; ++i ) { - row_work_host[i+1] = sum += input[i].size(); - } - - deep_copy( row_work , row_work_host ); - - output.entries = entries_type( label , sum ); - output.row_map = row_work ; - } - - // Fill in the entries: - { - typename entries_type::HostMirror host_entries = - create_mirror_view( output.entries ); - - size_t sum = 0 ; - for ( size_t i = 0 ; i < length ; ++i ) { - for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) { - host_entries( sum ) = input[i][j] ; - } - } - - deep_copy( output.entries , host_entries ); - } - - return output ; -} - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */ - diff --git a/kokkos/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp b/kokkos/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp deleted file mode 100644 index 651f66c..0000000 --- a/kokkos/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Kokkos { namespace Impl { namespace UnorderedMap { - -uint32_t find_hash_size(uint32_t size) -{ - if (size == 0u) return 0u; - - // these primes try to preserve randomness of hash - static const uint32_t primes [] = { - 3, 7, 13, 23, 53, 97, 193, 389, 769, 1543 - , 2237, 2423, 2617, 2797, 2999, 3167, 3359, 3539 - , 3727, 3911, 4441 , 4787 , 5119 , 5471 , 5801 , 6143 , 6521 , 6827 - , 7177 , 7517 , 7853 , 8887 , 9587 , 10243 , 10937 , 11617 , 12289 - , 12967 , 13649 , 14341 , 15013 , 15727 - , 17749 , 19121 , 20479 , 21859 , 23209 , 24593 , 25939 , 27329 - , 28669 , 30047 , 31469 , 35507 , 38231 , 40961 , 43711 , 46439 - , 49157 , 51893 , 54617 , 57347 , 60077 , 62801 , 70583 , 75619 - , 80669 , 85703 , 90749 , 95783 , 100823 , 105871 , 110909 , 115963 - , 120997 , 126031 , 141157 , 151237 , 161323 , 171401 , 181499 , 191579 - , 201653 , 211741 , 221813 , 231893 , 241979 , 252079 - , 282311 , 302483 , 322649 , 342803 , 362969 , 383143 , 403301 , 423457 - , 443629 , 463787 , 483953 , 504121 , 564617 , 604949 , 645313 , 685609 - , 725939 , 766273 , 806609 , 846931 , 887261 , 927587 , 967919 , 1008239 - , 1123477 , 1198397 , 1273289 , 1348177 , 1423067 , 1497983 , 1572869 - , 1647761 , 1722667 , 1797581 , 1872461 , 1947359 , 2022253 - , 2246953 , 2396759 , 2546543 , 2696363 , 2846161 , 2995973 , 3145739 - , 3295541 , 3445357 , 3595117 , 3744941 , 3894707 , 4044503 - , 4493921 , 4793501 , 5093089 , 5392679 , 5692279 , 5991883 , 6291469 - , 6591059 , 6890641 , 7190243 , 7489829 , 7789447 , 8089033 - , 8987807 , 9586981 , 10186177 , 10785371 , 11384539 , 11983729 - , 12582917 , 13182109 , 13781291 , 14380469 , 14979667 , 15578861 - , 16178053 , 17895707 , 19014187 , 20132683 , 21251141 , 22369661 - , 23488103 , 24606583 , 25725083 , 26843549 , 27962027 , 29080529 - , 30198989 , 31317469 , 32435981 , 35791397 , 38028379 , 40265327 - , 42502283 , 44739259 , 46976221 , 49213237 , 51450131 , 53687099 - , 55924061 , 58161041 , 60397993 , 62634959 , 64871921 - , 71582857 , 76056727 , 80530643 , 85004567 , 89478503 , 93952427 - , 98426347 , 102900263 , 107374217 , 111848111 , 116322053 , 120795971 - , 125269877 , 129743807 , 143165587 , 152113427 , 161061283 , 170009141 - , 178956983 , 187904819 , 196852693 , 205800547 , 214748383 , 223696237 - , 232644089 , 241591943 , 250539763 , 259487603 , 268435399 - }; - - - const size_t num_primes = sizeof(primes)/sizeof(uint32_t); - - uint32_t hsize = primes[num_primes-1] ; - for (size_t i = 0; i < num_primes; ++i) { - if (size <= primes[i]) { - hsize = primes[i]; - break; - } - } - return hsize; -} - -}}} // namespace Kokkos::Impl::UnorderedMap - diff --git a/kokkos/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp b/kokkos/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp deleted file mode 100644 index c079a1d..0000000 --- a/kokkos/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp +++ /dev/null @@ -1,818 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_UNORDERED_MAP_IMPL_HPP -#define KOKKOS_UNORDERED_MAP_IMPL_HPP - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace Kokkos { namespace Impl { namespace UnorderedMap { - -uint32_t find_hash_size( uint32_t size ); - -enum node_state -{ - UNUSED // not used in a list - , USED // used in a list - , PENDING_INSERT // not used in a list, but reserved by a thread for inserting - , PENDING_DELETE // node in the list is marked deleted - , INVALID // the 0th node in the node view is set to invalid -}; - -struct node_atomic -{ - static const uint64_t word_mask = 0x00000000FFFFFFFFu; - static const uint64_t word_shift = 32u; - static const uint32_t invalid_next = 0xFFFFFFFFu; - - KOKKOS_FORCEINLINE_FUNCTION - static uint32_t next(uint64_t v) - { return static_cast(v & word_mask); } - - KOKKOS_FORCEINLINE_FUNCTION - static node_state state(uint64_t v) - { return static_cast((v >> word_shift)); } - - KOKKOS_FORCEINLINE_FUNCTION - static uint64_t make_atomic( uint32_t n, node_state s) - { return (static_cast(s) << word_shift) | static_cast(n); } - - KOKKOS_FORCEINLINE_FUNCTION - node_atomic(uint64_t v = make_atomic(invalid_next, UNUSED) ) - : value(v) - {} - - KOKKOS_FORCEINLINE_FUNCTION - operator uint64_t() const - { return value; } - - uint64_t value; -}; - -template -struct Align16 -{ - static const size_t value = (Size & 15ull); -}; - -template ::value > -struct node -{ - typedef ValueType value_type; - - // contruct a new value at the current node - KOKKOS_FORCEINLINE_FUNCTION - void construct_value( const value_type & v ) - { new (&value) value_type(v); } - - // destruct the value at the current node - KOKKOS_FORCEINLINE_FUNCTION - void destruct_value() - { value.~value_type(); } - - value_type value; - uint8_t pad[AlignPad]; - node_atomic atomic; -}; - -template -struct node -{ - typedef ValueType value_type; - - // contruct a new value at the current node - KOKKOS_FORCEINLINE_FUNCTION - void construct_value( const value_type & v ) - { new (&value) value_type(v); } - - // destruct the value at the current node - KOKKOS_FORCEINLINE_FUNCTION - void destruct_value() - { value.~value_type(); } - - value_type value; - node_atomic atomic; -}; - -template -struct node_block -{ - typedef Node node_type; - typedef typename StaticAssert<(sizeof(node_type) % 16u == 0u)>::type node_okay; - - static const uint32_t shift = 5; - static const uint32_t size = 1u << shift; - static const uint32_t mask = size - 1u; - - KOKKOS_FORCEINLINE_FUNCTION - node_block() - : used_count(0) - , failed_inserts(0) - , pad(0) - , nodes() - {} - - int32_t used_count; - int32_t failed_inserts; - uint64_t pad; - node_type nodes[size]; -}; - -struct hash_list_sanity_type -{ - KOKKOS_INLINE_FUNCTION - hash_list_sanity_type() - : duplicate_keys_errors(0) - , unordered_list_errors(0) - , incorrect_hash_index_errors(0) - {} - - uint32_t duplicate_keys_errors; - uint32_t unordered_list_errors; - uint32_t incorrect_hash_index_errors; -}; - -struct node_state_counts -{ - KOKKOS_INLINE_FUNCTION - node_state_counts() - : in_sync(true) - , no_failed_inserts(true) - , unused(0) - , used_count(0) - , used(0) - , pending_insert(0) - , pending_delete(0) - , invalid(0) - , failed_inserts(0) - {} - - bool in_sync; - bool no_failed_inserts; - uint32_t unused; - uint32_t used_count; - uint32_t used; - uint32_t pending_insert; - uint32_t pending_delete; - uint32_t invalid; - uint32_t failed_inserts; -}; - - -template -struct sync_node_states_functor -{ - typedef typename MapData::device_type device_type; - typedef typename device_type::size_type size_type; - typedef typename MapData::node_block_type node_block_type; - typedef typename MapData::node_type node_type; - - typedef node_state_counts value_type; - - MapData map; - - sync_node_states_functor(MapData arg_map) - : map(arg_map) - { - parallel_reduce( map.capacity(), *this); - } - - KOKKOS_INLINE_FUNCTION - static void init( value_type & dst) - { - dst = value_type(); - } - - KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & dst, const volatile value_type & src) - { - dst.unused += src.unused; - dst.used_count += src.used_count; - dst.used += src.used; - dst.pending_insert += src.pending_insert; - dst.pending_delete += src.pending_delete; - dst.invalid += src.invalid; - dst.failed_inserts += src.failed_inserts; - } - - KOKKOS_INLINE_FUNCTION - void final( value_type & result ) const - { - result.in_sync = true; - result.no_failed_inserts = map.counts().no_failed_inserts; - - map.counts = result; - } - - KOKKOS_INLINE_FUNCTION - void operator()( size_type i, value_type & dst) const - { - // count block properties - if ((i%node_block_type::size) == 0u) { - dst.used_count += map.node_blocks[i>>node_block_type::shift].used_count; - dst.failed_inserts += map.node_blocks[i>>node_block_type::shift].failed_inserts; - } - - const node_state state = node_atomic::state(map.get_node(i).atomic); - - if (state == UNUSED) - ++dst.unused; - else if (state == USED) - ++dst.used; - else if (state == PENDING_INSERT) - ++dst.pending_insert; - else if (state == PENDING_DELETE) - ++dst.pending_delete; - else - ++dst.invalid; - } -}; - - -template -struct check_hash_list_functor -{ - typedef typename MapData::device_type device_type; - typedef typename device_type::size_type size_type; - typedef typename MapData::node_type node_type; - typedef hash_list_sanity_type value_type; - - MapData map; - - check_hash_list_functor(MapData arg_map, value_type & value) - : map(arg_map) - { - parallel_reduce( map.hashes.size(), *this, value); - } - - KOKKOS_INLINE_FUNCTION - static void init( value_type & dst) - { - dst.duplicate_keys_errors = 0; - dst.unordered_list_errors = 0; - dst.incorrect_hash_index_errors = 0; - } - - KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & dst, const volatile value_type & src) - { - dst.duplicate_keys_errors += src.duplicate_keys_errors; - dst.unordered_list_errors += src.unordered_list_errors; - dst.incorrect_hash_index_errors += src.incorrect_hash_index_errors; - } - - KOKKOS_INLINE_FUNCTION - void operator()( size_type i, value_type & errors) const - { - const uint64_t * prev_atomic = &map.hashes[i].value; - - uint32_t incorrect_hash_index_errors = 0; - uint32_t duplicate_keys_errors = 0; - uint32_t unordered_list_errors = 0; - - //traverse the list - while ( node_atomic::next(*prev_atomic) != node_atomic::invalid_next) { - const uint64_t * curr_atomic = &map.get_node(node_atomic::next(*prev_atomic)).atomic.value; - - const uint32_t curr_index = node_atomic::next(*prev_atomic); - const uint32_t next_index = node_atomic::next(*curr_atomic); - - //check that the key hashes to this index - const uint32_t hash_value = map.key_hash(map.get_node(curr_index).value.first); - const uint32_t hash_index = hash_value%map.hashes.size(); - - if ( static_cast(i) != hash_index) { - ++incorrect_hash_index_errors; - } - - if (next_index != node_atomic::invalid_next) { - //check that the list is ordered and has no duplicates - const bool key_less = map.key_compare( map.get_node(curr_index).value.first, map.get_node(next_index).value.first ); - const bool key_greater = map.key_compare( map.get_node(next_index).value.first, map.get_node(curr_index).value.first ); - const bool key_equal = !key_less && !key_greater; - - if (key_equal) { - ++duplicate_keys_errors; - } - else if (key_greater) { - ++unordered_list_errors; - } - } - - prev_atomic = curr_atomic; - } - - errors.incorrect_hash_index_errors += incorrect_hash_index_errors; - errors.duplicate_keys_errors += duplicate_keys_errors; - errors.unordered_list_errors += unordered_list_errors; - } -}; - -template -struct remove_pending_delete_keys_functor -{ - typedef typename MapData::device_type device_type; - typedef typename device_type::size_type size_type; - typedef typename MapData::node_type node_type; - typedef typename MapData::node_block_type node_block_type; - - node_block_type * node_blocks; - node_atomic * hashes; - node_state_counts * counts; - - remove_pending_delete_keys_functor( MapData arg_map ) - : node_blocks( const_cast(arg_map.node_blocks.ptr_on_device()) ) - , hashes( const_cast(arg_map.hashes.ptr_on_device()) ) - , counts( const_cast(arg_map.counts.ptr_on_device()) ) - { - parallel_for( arg_map.hashes.size(), *this); - device_type::fence(); - } - - KOKKOS_FORCEINLINE_FUNCTION - node_type & get_node(uint32_t i) const - { - return node_blocks[i>>node_block_type::shift].nodes[i&node_block_type::mask]; - } - - KOKKOS_INLINE_FUNCTION - void operator()( size_type i) const - { - if (i == static_cast(0)) { - counts->in_sync = false; - } - - uint64_t * prev_atomic = &hashes[i].value; - - while (node_atomic::next(*prev_atomic) != node_atomic::invalid_next) { - uint64_t * curr_atomic = &get_node( node_atomic::next(*prev_atomic)).atomic.value; - uint64_t prev = *prev_atomic; - uint64_t curr = *curr_atomic; - if (node_atomic::state(curr) == PENDING_DELETE) { - const uint32_t curr_index = node_atomic::next(prev); - const uint32_t curr_block = curr_index >> node_block_type::shift; - - //remove the node - *prev_atomic = node_atomic::make_atomic( node_atomic::next(curr), node_atomic::state(prev) ); - *curr_atomic = node_atomic::make_atomic( node_atomic::invalid_next, UNUSED ); - volatile int * used_count = &node_blocks[curr_block].used_count; - atomic_fetch_add(used_count, -1); - } - else { - prev_atomic = curr_atomic; - } - } - } -}; - -template -struct map_data -{ - typedef map_data self_type; - - typedef typename remove_const::type key_type; - typedef typename add_const::type const_key_type; - - typedef typename remove_const::type mapped_type; - typedef typename add_const::type const_mapped_type; - - typedef Device device_type; - typedef Compare compare_type; - typedef Hash hash_type; - - typedef map_data< key_type, mapped_type, Device, Compare, Hash> insertable_map_type; - typedef map_data< const_key_type, mapped_type, Device, Compare, Hash> modifiable_map_type; - typedef map_data< const_key_type, const_mapped_type, Device, Compare, Hash> const_map_type; - - static const bool has_const_key_type = is_const::value; - static const bool has_void_mapped_type = is_same::value; - static const bool has_const_mapped_type = has_void_mapped_type || is_const::value; - static const bool is_const_map = has_const_key_type && has_const_mapped_type; - - - typedef pair value_type; - - typedef typename if_c< is_const_map, value_type const *, value_type *>::type pointer; - typedef value_type const * const_pointer; - - typedef node node_type; - typedef node_block node_block_type; - - - typedef uint32_t size_type; - - typedef typename if_c< has_const_key_type - , View< const node_atomic *, device_type, MemoryTraits > - , View< node_atomic *, device_type > - >::type hash_view; - - typedef typename if_c< is_const_map - , View< const node_block_type *, device_type, MemoryTraits > - , View< node_block_type *, device_type > - >::type node_block_view; - - - typedef View< node_state_counts, device_type > counts_view; - - map_data() - : node_blocks() - , hashes() - , counts() - , key_compare() - , key_hash() - {} - - map_data( uint32_t num_nodes - , compare_type compare - , hash_type hash - ) - : node_blocks("UnorderedMap_nodes", (static_cast((num_nodes+node_block_type::size-1u)/node_block_type::size))) - , hashes("UnorderedMap_hashes", find_hash_size(capacity()) ) - , counts("UnorderedMap_counts") - , key_compare(compare) - , key_hash(hash) - {} - - template - KOKKOS_INLINE_FUNCTION - map_data( const MMapType & m) - : node_blocks(m.node_blocks) - , hashes(m.hashes) - , counts(m.counts) - , key_compare(m.key_compare) - , key_hash(m.key_hash) - {} - - template - KOKKOS_INLINE_FUNCTION - map_data & operator=( const MMapType & m) - { - node_blocks = m.node_blocks; - hashes = m.hashes; - counts = m.counts; - key_compare = m.key_compare; - key_hash = m.key_hash; - - return *this; - } - - KOKKOS_INLINE_FUNCTION - uint32_t capacity() const - { - return node_blocks.size() * node_block_type::size; - } - - KOKKOS_INLINE_FUNCTION - uint32_t hash_capacity() const - { - return static_cast(hashes.size()); - } - - bool in_sync() const - { - typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > deep_copy; - bool result = false; - deep_copy(&result, &counts.ptr_on_device()->in_sync, sizeof(bool) ); - return result; - } - - void sync_node_states() const - { - if (!in_sync()) { - sync_node_states_functor(*this); - device_type::fence(); - } - } - - uint32_t size() const - { - sync_node_states(); - typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > deep_copy; - uint32_t result = 0; - deep_copy(&result, &counts.ptr_on_device()->used, sizeof(uint32_t) ); - return result; - } - - uint32_t unused() const - { - sync_node_states(); - typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > deep_copy; - uint32_t result = 0; - deep_copy(&result, &counts.ptr_on_device()->unused, sizeof(uint32_t) ); - return result; - } - - uint32_t pending_insert() const - { - sync_node_states(); - typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > deep_copy; - uint32_t result = 0; - deep_copy(&result, &counts.ptr_on_device()->pending_insert, sizeof(uint32_t) ); - return result; - } - - uint32_t pending_delete() const - { - sync_node_states(); - typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > deep_copy; - uint32_t result = 0; - deep_copy(&result, &counts.ptr_on_device()->pending_delete, sizeof(uint32_t) ); - return result; - } - - uint32_t failed_inserts() const - { - sync_node_states(); - typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > deep_copy; - uint32_t result = 0; - deep_copy(&result, &counts.ptr_on_device()->failed_inserts, sizeof(uint32_t) ); - return result; - } - - uint32_t used_count() const - { - sync_node_states(); - typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > deep_copy; - uint32_t result = 0; - deep_copy(&result, &counts.ptr_on_device()->used_count, sizeof(uint32_t) ); - return result; - } - - uint32_t invalid_count() const - { - sync_node_states(); - typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > deep_copy; - uint32_t result = 0; - deep_copy(&result, &counts.ptr_on_device()->invalid, sizeof(uint32_t) ); - return result; - } - - hash_list_sanity_type check_hash_sanity() const - { - hash_list_sanity_type result; - check_hash_list_functor(*this, result); - device_type::fence(); - return result; - } - - void check_sanity() const - { - sync_node_states(); - - hash_list_sanity_type list_check; - - check_hash_list_functor(*this, list_check); - - device_type::fence(); - - std::ostringstream out; - - int total_errors = 0; - - if (failed_inserts() > 0u) { - out << "Error: " << failed_inserts() << " failed insertions\n"; - total_errors += failed_inserts(); - } - - if (list_check.duplicate_keys_errors > 0u) { - out << "Error: found " << list_check.duplicate_keys_errors << " duplicate keys found in lists\n"; - ++total_errors; - } - - if (list_check.unordered_list_errors > 0u) { - out << "Error: found " << list_check.unordered_list_errors << " unsorted lists\n"; - ++total_errors; - } - - if (list_check.incorrect_hash_index_errors > 0u) { - out << "Error: found " << list_check.incorrect_hash_index_errors << " keys incorrectly hashed\n"; - ++total_errors; - } - - if (invalid_count() > 0u) { - out << "Error: found " << invalid_count() << " invalid nodes \n"; - ++total_errors; - } - - if (pending_insert() > 0u) { - out << "Error: found " << pending_insert() << " pending insert nodes (should always be 0)\n"; - ++total_errors; - } - - if (used_count() != size() + pending_delete()) { - out << "Error: used_count(" << used_count() << ") != size(" << size() << ") + pending_delete(" - << pending_delete() << ") = " << size() + pending_delete() << "\n"; - ++total_errors; - } - - if (total_errors > 0) { - out << "Total Errors: " << total_errors << std::endl; - throw std::runtime_error( out.str() ); - } - } - - void remove_pending_delete_keys() const - { - remove_pending_delete_keys_functor remove_keys(*this); - } - - KOKKOS_INLINE_FUNCTION - uint32_t find_node_index( const key_type & k) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - - const uint32_t hash_value = key_hash(k); - const uint32_t hash_index = hash_value % hashes.size(); - - uint64_t prev = hashes[hash_index]; - - uint32_t index = node_atomic::invalid_next; - do { - const uint32_t curr_index = node_atomic::next(prev); - - if ( curr_index != node_atomic::invalid_next ) { - const node_type & curr_node = get_node(curr_index); - const uint64_t curr = get_node(curr_index).atomic; - - const bool curr_greater = key_compare( k, curr_node.value.first); - const bool curr_less = key_compare( curr_node.value.first, k); - const bool curr_equal = !curr_less && !curr_greater; - - if (curr_greater) { - index = node_atomic::invalid_next; - break; - } else if (curr_equal) { - // return existing node - index = curr_index; - break; - } - else { - // Current is less -- advance to next node - prev = curr; - } - } - else { - break; - } - } while (true); - - return index; - } - - KOKKOS_FORCEINLINE_FUNCTION - typename if_c< is_const_map, const node_type, node_type>::type & get_node(uint32_t i) const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - return node_blocks[i>>node_block_type::shift].nodes[i&node_block_type::mask]; - } - - KOKKOS_FORCEINLINE_FUNCTION - void set_modified() const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - if (counts().in_sync) { - counts().in_sync = false; -#if defined( __CUDA_ARCH__ ) - __threadfence(); -#endif - } - } - - KOKKOS_FORCEINLINE_FUNCTION - bool no_failed_inserts() const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - return counts().no_failed_inserts; - } - - KOKKOS_FORCEINLINE_FUNCTION - void set_failed_insert() const - { - //KOKKOS_RESTRICT_EXECUTION_TO( typename Device::memory_space ); - if (counts().no_failed_inserts) { - counts().no_failed_inserts = false; -#if defined( __CUDA_ARCH__ ) - __threadfence(); -#endif - } - } - - // Data members - node_block_view node_blocks; - hash_view hashes; - counts_view counts; - compare_type key_compare; - hash_type key_hash; -}; - - -template < class MapDst, class MapSrc > -inline void deep_copy_impl( MapDst & dst, const MapSrc & src ) -{ - deep_copy_data_impl(dst.m_data, src.m_data); -} - -template < class MapDst, class MapSrc > -struct copy_map_functor -{ - typedef typename MapDst::device_type device_type; - typedef typename device_type::size_type size_type; - typedef typename MapDst::const_pointer const_pointer; - - - MapDst dst; - MapSrc src; - - copy_map_functor( const MapDst & arg_dst, const MapSrc & arg_src ) - : dst(arg_dst), src(arg_src) - { - parallel_for(src.capacity(), *this); - } - - KOKKOS_INLINE_FUNCTION - void operator()(size_type i) const - { - const_pointer ptr = src.get_value(i); - - if (ptr != NULL) { - dst.insert(ptr->first,ptr->second); - } - } -}; - -template < class MapDst, class MapSrc > -void copy_map(MapDst & dst, const MapSrc & src) -{ - copy_map_functor func(dst,src); -} - -template < class MapDst, class MapSrc > -inline void deep_copy_data_impl( MapDst & dst, const MapSrc & src ) -{ - typedef typename MapDst::node_block_type node_block_type; - typedef Kokkos::Impl::DeepCopy< typename MapDst::device_type::memory_space, typename MapSrc::device_type::memory_space > raw_deep_copy; - dst.node_blocks = typename MapDst::node_block_view("UnorderedMap_nodes", src.node_blocks.size()); - dst.hashes = typename MapDst::hash_view("UnorderedMap_hashes", src.hashes.size()); - - raw_deep_copy(const_cast(dst.node_blocks.ptr_on_device()), src.node_blocks.ptr_on_device(), sizeof(node_block_type) * src.node_blocks.size()); - raw_deep_copy(const_cast(dst.hashes.ptr_on_device()), src.hashes.ptr_on_device(), sizeof(node_atomic) * src.hashes.size()); - raw_deep_copy(const_cast(dst.counts.ptr_on_device()), src.counts.ptr_on_device(), sizeof(node_state_counts)); - - dst.key_compare = src.key_compare; - dst.key_hash = src.key_hash; -} - -}}} // namespace Kokkos::Impl::UnorderedMap - -#endif //KOKKOS_UNORDERED_MAP_IMPL_HPP - diff --git a/kokkos/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/kokkos/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp deleted file mode 100644 index 569f000..0000000 --- a/kokkos/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp +++ /dev/null @@ -1,282 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDAEXEC_HPP -#define KOKKOS_CUDAEXEC_HPP - -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -#if defined( __CUDACC__ ) - -namespace Kokkos { -namespace Impl { - -class CudaExec { -public: - - __device__ inline - CudaExec( const int shmem_begin , const int shmem_end ) - : m_shmem_end( shmem_end ) - , m_shmem_iter( shmem_begin ) - {} - - __device__ inline - void * get_shmem( const int size ) - { - extern __shared__ int sh[]; - - // m_shmem_iter is in bytes, convert to integer offsets - const int offset = m_shmem_iter >> power_of_two::value ; - - m_shmem_iter += size ; - - if ( m_shmem_end < m_shmem_iter ) { - cuda_abort("Cuda::get_shmem out of memory"); - } - - return sh + offset ; - } - -private: - - const int m_shmem_end ; - int m_shmem_iter ; -}; - -} // namespace Impl -} // namespace Kokkos - -#if defined( __CUDA_ARCH__ ) - -namespace Kokkos { - -inline __device__ -void * Cuda::get_shmem( const int size ) { return m_exec.get_shmem( size ); } - -} // namespace Kokkos - -#endif /* defined( __CUDA_ARCH__ ) */ -#endif /* defined( __CUDACC__ ) */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -struct CudaTraits { - enum { WarpSize = 32 /* 0x0020 */ }; - enum { WarpIndexMask = 0x001f /* Mask for warpindex */ }; - enum { WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ }; - - enum { SharedMemoryBanks = 32 /* Compute device 2.0 */ }; - enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ }; - enum { SharedMemoryUsage = 0x04000 /* 16k shared / 48k L1 Cache */ }; - - enum { UpperBoundGridCount = 65535 /* Hard upper bound */ }; - enum { ConstantMemoryCapacity = 0x010000 /* 64k bytes */ }; - enum { ConstantMemoryUsage = 0x008000 /* 32k bytes */ }; - enum { ConstantMemoryCache = 0x002000 /* 8k bytes */ }; - - typedef unsigned long - ConstantGlobalBufferType[ ConstantMemoryUsage / sizeof(unsigned long) ]; - - enum { ConstantMemoryUseThreshold = 0x000100 /* 256 bytes */ }; - - KOKKOS_INLINE_FUNCTION static - CudaSpace::size_type warp_count( CudaSpace::size_type i ) - { return ( i + WarpIndexMask ) >> WarpIndexShift ; } - - KOKKOS_INLINE_FUNCTION static - CudaSpace::size_type warp_align( CudaSpace::size_type i ) - { - enum { Mask = ~CudaSpace::size_type( WarpIndexMask ) }; - return ( i + WarpIndexMask ) & Mask ; - } -}; - -//---------------------------------------------------------------------------- - -CudaSpace::size_type cuda_internal_maximum_warp_count(); -CudaSpace::size_type cuda_internal_maximum_grid_count(); -CudaSpace::size_type cuda_internal_maximum_shared_words(); - -CudaSpace::size_type * cuda_internal_scratch_flags( const CudaSpace::size_type size ); -CudaSpace::size_type * cuda_internal_scratch_space( const CudaSpace::size_type size ); -CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type size ); - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __CUDACC__ ) - -/** \brief Access to constant memory on the device */ -__device__ __constant__ -Kokkos::Impl::CudaTraits::ConstantGlobalBufferType -kokkos_impl_cuda_constant_memory_buffer ; - -template< typename T > -inline -__device__ -T * kokkos_impl_cuda_shared_memory() -{ extern __shared__ Kokkos::CudaSpace::size_type sh[]; return (T*) sh ; } - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- -// See section B.17 of Cuda C Programming Guide Version 3.2 -// for discussion of -// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor) -// function qualifier which could be used to improve performance. -//---------------------------------------------------------------------------- -// Maximize L1 cache and minimize shared memory: -// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferL1 ); -// For 2.0 capability: 48 KB L1 and 16 KB shared -//---------------------------------------------------------------------------- - -template< class DriverType > -__global__ -static void cuda_parallel_launch_constant_memory() -{ - const DriverType & driver = - *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); - - driver(); -} - -template< class DriverType > -__global__ -static void cuda_parallel_launch_local_memory( const DriverType driver ) -{ - driver(); -} - -template < class DriverType , - bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) ) > -struct CudaParallelLaunch ; - -template < class DriverType > -struct CudaParallelLaunch< DriverType , true > { - - inline - CudaParallelLaunch( const DriverType & driver , - const dim3 & grid , - const dim3 & block , - const int shmem ) - { - if ( grid.x && block.x ) { - - if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < - sizeof( DriverType ) ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") ); - } - - if ( CudaTraits::SharedMemoryCapacity < shmem ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); - } - else if ( shmem ) { - cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferShared ); - } else { - cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferL1 ); - } - - // Copy functor to constant memory on the device - cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) ); - - // Invoke the driver function on the device - cuda_parallel_launch_constant_memory< DriverType ><<< grid , block , shmem >>>(); - -#if defined( KOKKOS_EXPRESSION_CHECK ) - Kokkos::Cuda::fence(); -#endif - } - } -}; - -template < class DriverType > -struct CudaParallelLaunch< DriverType , false > { - - inline - CudaParallelLaunch( const DriverType & driver , - const dim3 & grid , - const dim3 & block , - const int shmem ) - { - if ( grid.x && block.x ) { - - if ( CudaTraits::SharedMemoryCapacity < shmem ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); - } - else if ( shmem ) { - cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferShared ); - } else { - cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferL1 ); - } - - cuda_parallel_launch_local_memory< DriverType ><<< grid , block , shmem >>>( driver ); - -#if defined( KOKKOS_EXPRESSION_CHECK ) - Kokkos::Cuda::fence(); -#endif - } - } -}; - -//---------------------------------------------------------------------------- - -} // namespace Impl -} // namespace Kokkos - -#endif /* defined( __CUDACC__ ) */ - -#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */ diff --git a/kokkos/kokkos/core/src/Cuda/Kokkos_CudaSpace.cu b/kokkos/kokkos/core/src/Cuda/Kokkos_CudaSpace.cu deleted file mode 100644 index 908ad0f..0000000 --- a/kokkos/kokkos/core/src/Cuda/Kokkos_CudaSpace.cu +++ /dev/null @@ -1,300 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { - -DeepCopy - ::DeepCopy( void * dst , const void * src , size_t n ) -{ - CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); -} - -DeepCopy - ::DeepCopy( void * dst , const void * src , size_t n ) -{ - CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); -} - -DeepCopy - ::DeepCopy( void * dst , const void * src , size_t n ) -{ - CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); -} - -} // namespace Impl -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace { - -class CudaMemoryTrackingEntry : public Impl::MemoryTrackingEntry -{ -public: - - void * const ptr_alloc ; - const unsigned size ; - const unsigned count ; - Impl::cuda_texture_object_type tex_obj ; - - CudaMemoryTrackingEntry( const std::string & arg_label , - const std::type_info & arg_info , - void * const arg_ptr , - const unsigned arg_size , - const unsigned arg_count ) - : Impl::MemoryTrackingEntry( arg_label , arg_info , arg_ptr , arg_size * arg_count ) - , ptr_alloc( arg_ptr ) - , size( arg_size ) - , count( arg_count ) - , tex_obj( 0 ) - {} - - ~CudaMemoryTrackingEntry(); -}; - -CudaMemoryTrackingEntry::~CudaMemoryTrackingEntry() -{ - cudaError_t sync_err = cudaDeviceSynchronize(); - - if ( tex_obj ) { - - } - - cudaError_t free_err = cudaFree( ptr_alloc ); - - if ( cudaSuccess != sync_err || cudaSuccess != free_err ) { - std::cerr << "cudaFree( " << ptr_alloc << " ) FAILED for " ; - Impl::MemoryTrackingEntry::print( std::cerr ); - } -} - -Impl::MemoryTracking & cuda_space_singleton() -{ - static Impl::MemoryTracking self("Kokkos::CudaSpace"); - return self ; -} - -} - -/*--------------------------------------------------------------------------*/ - -/*--------------------------------------------------------------------------*/ - -void * CudaSpace::allocate( - const std::string & label , - const std::type_info & scalar_type , - const size_t scalar_size , - const size_t scalar_count ) -{ - if ( HostSpace::in_parallel() ) { - Kokkos::Impl::throw_runtime_exception( "Kokkos::CudaSpace::allocate ERROR : Called with HostSpace::in_parallel" ); - } - - const size_t size = scalar_size * scalar_count ; - - void * ptr = 0 ; - - if ( 0 < scalar_size * scalar_count ) { - - try { - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - CUDA_SAFE_CALL( cudaMalloc( (void**) &ptr, size) ); - CUDA_SAFE_CALL( cudaThreadSynchronize() ); - } - catch( std::runtime_error & err) { - std::ostringstream msg ; - msg << "Kokkos::Impl::CudaSpace::allocate( " - << label - << " , " << scalar_type.name() - << " , " << scalar_size - << " , " << scalar_count - << " ) FAILED memory allocation\n" - << err.what(); - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - - cuda_space_singleton().insert( - new CudaMemoryTrackingEntry( label , scalar_type , ptr , scalar_size , scalar_count ) ); - } - - return ptr ; -} - -void CudaSpace::increment( const void * ptr ) -{ - if ( HostSpace::in_parallel() ) { - Kokkos::Impl::throw_runtime_exception( "Kokkos::CudaSpace::increment ERROR : Called with HostSpace::in_parallel" ); - } - - cuda_space_singleton().increment( ptr ); -} - -void CudaSpace::decrement( const void * ptr ) -{ - if ( HostSpace::in_parallel() ) { - Kokkos::Impl::throw_runtime_exception( "Kokkos::CudaSpace::decrement ERROR : Called with HostSpace::in_parallel" ); - } - - cuda_space_singleton().decrement( ptr ); -} - -void CudaSpace::print_memory_view( std::ostream & o ) -{ - cuda_space_singleton().print( o , std::string(" ") ); -} - -//---------------------------------------------------------------------------- - -std::string CudaSpace::query_label( const void * p ) -{ - const Impl::MemoryTrackingEntry * entry = - cuda_space_singleton().query( p ); - - return entry ? entry->label : std::string("ERROR NOT FOUND"); -} - -void CudaSpace::access_error() -{ - const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" ); - - Kokkos::Impl::throw_runtime_exception( msg ); -} - -void CudaSpace::access_error( const void * const ptr ) -{ - std::ostringstream msg ; - msg << "Kokkos::CudaSpace::access_error:" ; - msg << " attempt to access Cuda-data labeled(" ; - msg << query_label( ptr ) ; - msg << ") from non-Cuda execution" ; - Kokkos::Impl::throw_runtime_exception( msg.str() ); -} - -/*--------------------------------------------------------------------------*/ - -} // namespace Kokkos - -#if defined( CUDA_VERSION ) && ( 500 <= CUDA_VERSION ) - -namespace Kokkos { -namespace Impl { - -::cudaTextureObject_t -cuda_texture_object_attach( - const cudaChannelFormatDesc & desc , - const void * const ptr ) -{ - if ( 0 == ptr ) return 0 ; - - const unsigned max_count = 1 << 28 ; - - CudaMemoryTrackingEntry * entry = - dynamic_cast( cuda_space_singleton().query( ptr ) ); - - const bool ok_found = 0 != entry ; - const bool ok_ptr = ok_found && ptr == entry->ptr_alloc ; - const bool ok_count = ok_found && entry->count < max_count ; - - if ( ok_found && ok_ptr && ok_count ) { - - // Can only create texture object on device architure 3.0 or better - - if ( 0 == entry->tex_obj && 300 <= Cuda::device_arch() ) { - - struct cudaResourceDesc resDesc ; - struct cudaTextureDesc texDesc ; - - memset( & resDesc , 0 , sizeof(resDesc) ); - memset( & texDesc , 0 , sizeof(texDesc) ); - - resDesc.resType = cudaResourceTypeLinear ; - resDesc.res.linear.desc = desc ; - resDesc.res.linear.sizeInBytes = entry->size * entry->count ; - resDesc.res.linear.devPtr = entry->ptr_alloc ; - - cudaCreateTextureObject( & entry->tex_obj, & resDesc, & texDesc, NULL); - } - } - else { - std::ostringstream msg ; - msg << "CudaSpace::texture_object_attach( " << ptr << " ) FAILED: " ; - - if ( ! ok_found ) { - msg << "Not View allocated" ; - } - else if ( ! ok_ptr ) { - msg << "Not the originally allocated View \"" << entry->label << "\"" ; - } - else if ( ! ok_count ) { - msg << "Cuda texture object limit exceeded " - << max_count << " <= " << entry->count ; - } - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - - return entry->tex_obj ; -} - -} // namespace Impl -} // namespace Kokkos - -#endif - - diff --git a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cu b/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cu deleted file mode 100644 index 6a7d0cd..0000000 --- a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cu +++ /dev/null @@ -1,587 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -/*--------------------------------------------------------------------------*/ -/* Kokkos interfaces */ - -#include -#include -#include - -/*--------------------------------------------------------------------------*/ -/* Standard 'C' libraries */ -#include - -/* Standard 'C++' libraries */ -#include -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { - - -void cuda_internal_error_throw( cudaError e , const char * name, const char * file, const int line ) -{ - std::ostringstream out ; - out << name << " error: " << cudaGetErrorString(e); - if (file) { - out << " " << file << ":" << line; - } - throw_runtime_exception( out.str() ); -} - -//---------------------------------------------------------------------------- -// Some significant cuda device properties: -// -// cudaDeviceProp::name : Text label for device -// cudaDeviceProp::major : Device major number -// cudaDeviceProp::minor : Device minor number -// cudaDeviceProp::warpSize : number of threads per warp -// cudaDeviceProp::multiProcessorCount : number of multiprocessors -// cudaDeviceProp::sharedMemPerBlock : capacity of shared memory per block -// cudaDeviceProp::totalConstMem : capacity of constant memory -// cudaDeviceProp::totalGlobalMem : capacity of global memory -// cudaDeviceProp::maxGridSize[3] : maximum grid size - -// -// Section 4.4.2.4 of the CUDA Toolkit Reference Manual -// -// struct cudaDeviceProp { -// char name[256]; -// size_t totalGlobalMem; -// size_t sharedMemPerBlock; -// int regsPerBlock; -// int warpSize; -// size_t memPitch; -// int maxThreadsPerBlock; -// int maxThreadsDim[3]; -// int maxGridSize[3]; -// size_t totalConstMem; -// int major; -// int minor; -// int clockRate; -// size_t textureAlignment; -// int deviceOverlap; -// int multiProcessorCount; -// int kernelExecTimeoutEnabled; -// int integrated; -// int canMapHostMemory; -// int computeMode; -// int concurrentKernels; -// int ECCEnabled; -// int pciBusID; -// int pciDeviceID; -// int tccDriver; -// int asyncEngineCount; -// int unifiedAddressing; -// int memoryClockRate; -// int memoryBusWidth; -// int l2CacheSize; -// int maxThreadsPerMultiProcessor; -// }; - - -namespace { - - - -class CudaInternalDevices { -public: - enum { MAXIMUM_DEVICE_COUNT = 8 }; - struct cudaDeviceProp m_cudaProp[ MAXIMUM_DEVICE_COUNT ] ; - int m_cudaDevCount ; - - CudaInternalDevices(); - - static const CudaInternalDevices & singleton(); -}; - -CudaInternalDevices::CudaInternalDevices() -{ - // See 'cudaSetDeviceFlags' for host-device thread interaction - // Section 4.4.2.6 of the CUDA Toolkit Reference Manual - - CUDA_SAFE_CALL (cudaGetDeviceCount( & m_cudaDevCount ) ); - - for ( int i = 0 ; i < m_cudaDevCount ; ++i ) { - CUDA_SAFE_CALL( cudaGetDeviceProperties( m_cudaProp + i , i ) ); - } -} - -const CudaInternalDevices & CudaInternalDevices::singleton() -{ - static CudaInternalDevices self ; return self ; -} - -} - -//---------------------------------------------------------------------------- - -class CudaInternal { -private: - - CudaInternal( const CudaInternal & ); - CudaInternal & operator = ( const CudaInternal & ); - -public: - - typedef Cuda::size_type size_type ; - - int m_cudaDev ; - unsigned m_maxWarpCount ; - unsigned m_maxBlock ; - unsigned m_maxSharedWords ; - size_type m_scratchSpaceCount ; - size_type m_scratchFlagsCount ; - size_type m_scratchUnifiedCount ; - size_type m_scratchUnifiedSupported ; - size_type * m_scratchSpace ; - size_type * m_scratchFlags ; - size_type * m_scratchUnified ; - - static CudaInternal & raw_singleton(); - static CudaInternal & singleton(); - - const CudaInternal & assert_initialized() const ; - - int is_initialized() const - { return 0 != m_scratchSpace && 0 != m_scratchFlags ; } - - void initialize( int cuda_device_id ); - void finalize(); - - void print_configuration( std::ostream & ) const ; - - ~CudaInternal(); - - CudaInternal() - : m_cudaDev( -1 ) - , m_maxWarpCount( 0 ) - , m_maxBlock( 0 ) - , m_maxSharedWords( 0 ) - , m_scratchSpaceCount( 0 ) - , m_scratchFlagsCount( 0 ) - , m_scratchUnifiedCount( 0 ) - , m_scratchUnifiedSupported( 0 ) - , m_scratchSpace( 0 ) - , m_scratchFlags( 0 ) - , m_scratchUnified( 0 ) - {} - - size_type * scratch_space( const size_type size ); - size_type * scratch_flags( const size_type size ); - size_type * scratch_unified( const size_type size ); -}; - -//---------------------------------------------------------------------------- - - -void CudaInternal::print_configuration( std::ostream & s ) const -{ - const CudaInternalDevices & dev_info = CudaInternalDevices::singleton(); - -#if defined( KOKKOS_HAVE_CUDA ) - s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ; -#endif -#if defined( KOKKOS_HAVE_CUDA_ARCH ) - s << "macro KOKKOS_HAVE_CUDA_ARCH = " << KOKKOS_HAVE_CUDA_ARCH - << " = capability " << KOKKOS_HAVE_CUDA_ARCH / 100 - << "." << ( KOKKOS_HAVE_CUDA_ARCH % 100 ) / 10 - << std::endl ; -#endif -#if defined( CUDA_VERSION ) - s << "macro CUDA_VERSION = " << CUDA_VERSION - << " = version " << CUDA_VERSION / 1000 - << "." << ( CUDA_VERSION % 1000 ) / 10 - << std::endl ; -#endif - - for ( int i = 0 ; i < dev_info.m_cudaDevCount ; ++i ) { - s << "Kokkos::Cuda[ " << i << " ] " - << dev_info.m_cudaProp[i].name - << " capability " << dev_info.m_cudaProp[i].major << "." << dev_info.m_cudaProp[i].minor - << ", Total Global Memory: " << human_memory_size(dev_info.m_cudaProp[i].totalGlobalMem) - << ", Shared Memory per Block: " << human_memory_size(dev_info.m_cudaProp[i].sharedMemPerBlock); - if ( m_cudaDev == i ) s << " : Selected" ; - s << std::endl ; - } -} - -//---------------------------------------------------------------------------- - -CudaInternal::~CudaInternal() -{ - if ( m_scratchSpace || - m_scratchFlags || - m_scratchUnified ) { - std::cerr << "Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize()" - << std::endl ; - std::cerr.flush(); - } -} - -CudaInternal & CudaInternal::raw_singleton() -{ static CudaInternal self ; return self ; } - -const CudaInternal & CudaInternal::assert_initialized() const -{ - if ( m_cudaDev == -1 ) { - const std::string msg("CATASTROPHIC FAILURE: Using Kokkos::Cuda before calling Kokkos::Cuda::initialize(...)"); - throw_runtime_exception( msg ); - } - return *this ; -} - -CudaInternal & CudaInternal::singleton() -{ - CudaInternal & s = raw_singleton(); - s.assert_initialized(); - return s ; -} - -void CudaInternal::initialize( int cuda_device_id ) -{ - enum { WordSize = sizeof(size_type) }; - - if ( ! Cuda::host_mirror_device_type::is_initialized() ) { - const std::string msg("Cuda::initialize ERROR : Cuda::host_mirror_device_type is not initialized"); - throw_runtime_exception( msg ); - } - - const CudaInternalDevices & dev_info = CudaInternalDevices::singleton(); - - const bool ok_init = 0 == m_scratchSpace || 0 == m_scratchFlags ; - - const bool ok_id = 0 <= cuda_device_id && - cuda_device_id < dev_info.m_cudaDevCount ; - - // Need device capability 2.0 or better - - const bool ok_dev = ok_id && - ( 2 <= dev_info.m_cudaProp[ cuda_device_id ].major && - 0 <= dev_info.m_cudaProp[ cuda_device_id ].minor ); - - if ( ok_init && ok_dev ) { - - const struct cudaDeviceProp & cudaProp = - dev_info.m_cudaProp[ cuda_device_id ]; - - m_cudaDev = cuda_device_id ; - - CUDA_SAFE_CALL( cudaSetDevice( m_cudaDev ) ); - CUDA_SAFE_CALL( cudaDeviceReset() ); - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - - //---------------------------------- - // Maximum number of warps, - // at most one warp per thread in a warp for reduction. - - // HCE 2012-February : - // Found bug in CUDA 4.1 that sometimes a kernel launch would fail - // if the thread count == 1024 and a functor is passed to the kernel. - // Copying the kernel to constant memory and then launching with - // thread count == 1024 would work fine. - // - // HCE 2012-October : - // All compute capabilities support at least 16 warps (512 threads). - // However, we have found that 8 warps typically gives better performance. - - m_maxWarpCount = 8 ; - - // m_maxWarpCount = cudaProp.maxThreadsPerBlock / Impl::CudaTraits::WarpSize ; - - if ( Impl::CudaTraits::WarpSize < m_maxWarpCount ) { - m_maxWarpCount = Impl::CudaTraits::WarpSize ; - } - - m_maxSharedWords = cudaProp.sharedMemPerBlock / WordSize ; - - //---------------------------------- - - m_maxBlock = cudaProp.maxGridSize[0] ; - - //---------------------------------- - - m_scratchUnifiedSupported = cudaProp.unifiedAddressing ; - - if ( ! m_scratchUnifiedSupported ) { - std::cout << "Kokkos::Cuda device " - << cudaProp.name << " capability " - << cudaProp.major << "." << cudaProp.minor - << " does not support unified virtual address space" - << std::endl ; - } - - //---------------------------------- - // Multiblock reduction uses scratch flags for counters - // and scratch space for partial reduction values. - // Allocate some initial space. This will grow as needed. - - { - const unsigned reduce_block_count = m_maxWarpCount * Impl::CudaTraits::WarpSize ; - - (void) scratch_unified( 16 * sizeof(size_type) ); - (void) scratch_flags( reduce_block_count * 2 * sizeof(size_type) ); - (void) scratch_space( reduce_block_count * 16 * sizeof(size_type) ); - } - } - else { - - std::ostringstream msg ; - msg << "Kokkos::Cuda::initialize(" << cuda_device_id << ") FAILED" ; - - if ( ! ok_init ) { - msg << " : Already initialized" ; - } - if ( ! ok_id ) { - msg << " : Device identifier out of range " - << "[0.." << dev_info.m_cudaDevCount << "]" ; - } - else if ( ! ok_dev ) { - msg << " : Device " ; - msg << dev_info.m_cudaProp[ cuda_device_id ].major ; - msg << "." ; - msg << dev_info.m_cudaProp[ cuda_device_id ].minor ; - msg << " has insufficient capability, required 2.0 or better" ; - } - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } -} - -//---------------------------------------------------------------------------- - -typedef Cuda::size_type ScratchGrain[ Impl::CudaTraits::WarpSize ] ; -enum { sizeScratchGrain = sizeof(ScratchGrain) }; - - -Cuda::size_type * -CudaInternal::scratch_flags( const Cuda::size_type size ) -{ - assert_initialized(); - - if ( m_scratchFlagsCount * sizeScratchGrain < size ) { - - Cuda::memory_space::decrement( m_scratchFlags ); - - m_scratchFlagsCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ; - - m_scratchFlags = (size_type *) - Cuda::memory_space::allocate( - std::string("InternalScratchFlags") , - typeid( ScratchGrain ), - sizeof( ScratchGrain ), - m_scratchFlagsCount ); - - CUDA_SAFE_CALL( cudaMemset( m_scratchFlags , 0 , m_scratchFlagsCount * sizeScratchGrain ) ); - } - - return m_scratchFlags ; -} - -Cuda::size_type * -CudaInternal::scratch_space( const Cuda::size_type size ) -{ - assert_initialized(); - - if ( m_scratchSpaceCount * sizeScratchGrain < size ) { - - Cuda::memory_space::decrement( m_scratchSpace ); - - m_scratchSpaceCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ; - - m_scratchSpace = (size_type *) - Cuda::memory_space::allocate( - std::string("InternalScratchSpace") , - typeid( ScratchGrain ), - sizeof( ScratchGrain ), - m_scratchSpaceCount ); - } - - return m_scratchSpace ; -} - -Cuda::size_type * -CudaInternal::scratch_unified( const Cuda::size_type size ) -{ - assert_initialized(); - - if ( m_scratchUnifiedSupported ) { - - const bool allocate = m_scratchUnifiedCount * sizeScratchGrain < size ; - const bool deallocate = m_scratchUnified && ( 0 == size || allocate ); - - if ( allocate || deallocate ) { - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - } - - if ( deallocate ) { - - CUDA_SAFE_CALL( cudaFreeHost( m_scratchUnified ) ); - - m_scratchUnified = 0 ; - m_scratchUnifiedCount = 0 ; - } - - if ( allocate ) { - - m_scratchUnifiedCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ; - - CUDA_SAFE_CALL( cudaHostAlloc( (void **)( & m_scratchUnified ) , - m_scratchUnifiedCount * sizeScratchGrain , - cudaHostAllocDefault ) ); - } - } - - return m_scratchUnified ; -} - -//---------------------------------------------------------------------------- - -void CudaInternal::finalize() -{ - if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) { - - Cuda::memory_space::decrement( m_scratchSpace ); - Cuda::memory_space::decrement( m_scratchFlags ); - (void) scratch_unified( 0 ); - - m_cudaDev = -1 ; - m_maxWarpCount = 0 ; - m_maxBlock = 0 ; - m_maxSharedWords = 0 ; - m_scratchSpaceCount = 0 ; - m_scratchFlagsCount = 0 ; - m_scratchSpace = 0 ; - m_scratchFlags = 0 ; - } -} - -//---------------------------------------------------------------------------- - -Cuda::size_type cuda_internal_maximum_warp_count() -{ return CudaInternal::singleton().m_maxWarpCount ; } - -Cuda::size_type cuda_internal_maximum_grid_count() -{ return CudaInternal::singleton().m_maxBlock ; } - -Cuda::size_type cuda_internal_maximum_shared_words() -{ return CudaInternal::singleton().m_maxSharedWords ; } - -Cuda::size_type * cuda_internal_scratch_space( const Cuda::size_type size ) -{ return CudaInternal::singleton().scratch_space( size ); } - -Cuda::size_type * cuda_internal_scratch_flags( const Cuda::size_type size ) -{ return CudaInternal::singleton().scratch_flags( size ); } - -Cuda::size_type * cuda_internal_scratch_unified( const Cuda::size_type size ) -{ return CudaInternal::singleton().scratch_unified( size ); } - - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -Cuda::size_type Cuda::detect_device_count() -{ return Impl::CudaInternalDevices::singleton().m_cudaDevCount ; } - -int Cuda::is_initialized() -{ return Impl::CudaInternal::raw_singleton().is_initialized(); } - -void Cuda::initialize( const Cuda::SelectDevice config ) -{ Impl::CudaInternal::raw_singleton().initialize( config.cuda_device_id ); } - -std::vector -Cuda::detect_device_arch() -{ - const Impl::CudaInternalDevices & s = Impl::CudaInternalDevices::singleton(); - - std::vector output( s.m_cudaDevCount ); - - for ( int i = 0 ; i < s.m_cudaDevCount ; ++i ) { - output[i] = s.m_cudaProp[i].major * 100 + s.m_cudaProp[i].minor ; - } - - return output ; -} - -Cuda::size_type Cuda::device_arch() -{ - const int dev_id = Impl::CudaInternal::singleton().m_cudaDev ; - - const struct cudaDeviceProp & cudaProp = - Impl::CudaInternalDevices::singleton().m_cudaProp[ dev_id ] ; - - return cudaProp.major * 100 + cudaProp.minor ; -} - -void Cuda::finalize() -{ Impl::CudaInternal::raw_singleton().finalize(); } - -void Cuda::print_configuration( std::ostream & s , const bool ) -{ Impl::CudaInternal::raw_singleton().print_configuration( s ); } - -bool Cuda::sleep() { return false ; } - -bool Cuda::wake() { return true ; } - -void Cuda::fence() -{ - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); -} - -unsigned Cuda::team_max() -{ - return Impl::CudaInternal::singleton().m_maxWarpCount << Impl::CudaTraits::WarpIndexShift ; -} - -} // namespace Kokkos - -//---------------------------------------------------------------------------- - diff --git a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp b/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp deleted file mode 100644 index f386075..0000000 --- a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDA_INTERNAL_HPP -#define KOKKOS_CUDA_INTERNAL_HPP - -namespace Kokkos { -namespace Impl { - -void cuda_internal_error_throw( cudaError e , const char * name, const char * file = NULL, const int line = 0 ); - -inline -void cuda_internal_safe_call( cudaError e , const char * name, const char * file = NULL, const int line = 0) -{ - if ( cudaSuccess != e ) { cuda_internal_error_throw( e , name, file, line ); } -} - -} -} - -#define CUDA_SAFE_CALL( call ) \ - Kokkos::Impl::cuda_internal_safe_call( call , #call, __FILE__, __LINE__ ) - -#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */ - diff --git a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp deleted file mode 100644 index cf4cfb1..0000000 --- a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ /dev/null @@ -1,829 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDA_PARALLEL_HPP -#define KOKKOS_CUDA_PARALLEL_HPP - -#include -#include - -#if defined( __CUDACC__ ) - -#include -#include - -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class FunctorType , class WorkSpec > -class ParallelFor< FunctorType , WorkSpec /* size_t */ , Cuda > { -private: - - const FunctorType m_functor ; - const Cuda::size_type m_work ; - - ParallelFor(); - ParallelFor & operator = ( const ParallelFor & ); - -public: - - inline - __device__ - void operator()(void) const - { - const Cuda::size_type work_stride = blockDim.x * gridDim.x ; - - for ( Cuda::size_type - iwork = threadIdx.x + blockDim.x * blockIdx.x ; - iwork < m_work ; - iwork += work_stride ) { - m_functor( iwork ); - } - } - - ParallelFor( const FunctorType & functor , - const size_t work ) - : m_functor( functor ) - , m_work( work ) - { - const dim3 block( CudaTraits::WarpSize * cuda_internal_maximum_warp_count(), 1, 1); - const dim3 grid( std::min( ( m_work + block.x - 1 ) / block.x , cuda_internal_maximum_grid_count() ) , 1 , 1 ); - - CudaParallelLaunch< ParallelFor >( *this , grid , block , 0 ); - } -}; - -template< class FunctorType > -class ParallelFor< FunctorType , ParallelWorkRequest , Cuda > { -private: - - const FunctorType m_functor ; - const ParallelWorkRequest m_work ; - const int m_shmem ; - - ParallelFor(); - ParallelFor & operator = ( const ParallelFor & ); - -public: - - inline - __device__ - void operator()(void) const - { - CudaExec exec( 0 , m_shmem ); - m_functor( Cuda( exec ) ); - } - - ParallelFor( const FunctorType & functor , - const ParallelWorkRequest & work ) - : m_functor( functor ) - , m_work( std::min( work.league_size , size_t(cuda_internal_maximum_grid_count()) ) , - std::min( work.team_size , size_t(CudaTraits::WarpSize * cuda_internal_maximum_warp_count()) ) ) - , m_shmem( FunctorShmemSize< FunctorType >::value( functor ) ) - { - const dim3 grid( m_work.league_size , 1 , 1 ); - const dim3 block( m_work.team_size , 1, 1 ); - - CudaParallelLaunch< ParallelFor >( *this , grid , block , m_shmem ); - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class FunctorType > -class ParallelFor< FunctorType , CudaWorkConfig , Cuda > { -public: - - const FunctorType m_work_functor ; - - inline - __device__ - void operator()(void) const - { - Cuda::size_type iwork = threadIdx.x + blockDim.x * ( - threadIdx.y + blockDim.y * ( - threadIdx.z + blockDim.z * ( - blockIdx.x + gridDim.x * ( - blockIdx.y + gridDim.y * ( - blockIdx.z ))))); - - m_work_functor( iwork ); - } - - ParallelFor( const FunctorType & functor , - const CudaWorkConfig & work_config ) - : m_work_functor( functor ) - { - const dim3 grid( work_config.grid[0] , - work_config.grid[1] , - work_config.grid[2] ); - - const dim3 block( work_config.block[0] , - work_config.block[1] , - work_config.block[2] ); - - CudaParallelLaunch< ParallelFor >( *this , grid , block , work_config.shared ); - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class FunctorType , class WorkSpec > -class ParallelReduce< FunctorType , WorkSpec , Cuda > -{ -public: - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - typedef typename Reduce::reference_type reference_type ; - typedef Cuda::size_type size_type ; - - // Algorithmic constraints: - // (a) blockSize is a power of two - // (b) blockDim.x == BlockSize == 1 << BlockSizeShift - // (c) blockDim.y == blockDim.z == 1 - - enum { WarpCount = 8 }; - enum { BlockSize = CudaTraits::WarpSize << power_of_two< WarpCount >::value }; - enum { BlockSizeShift = power_of_two< BlockSize >::value }; - enum { BlockSizeMask = BlockSize - 1 }; - - enum { GridMaxComputeCapability_2x = 0x0ffff }; - enum { GridMax = BlockSize }; - - const FunctorType m_functor ; - size_type * m_scratch_space ; - size_type * m_scratch_flags ; - size_type * m_unified_space ; - pointer_type m_host_pointer ; - size_type m_work ; - size_type m_work_per_block ; - size_type m_local_block_count ; - size_type m_global_block_begin ; - size_type m_global_block_count ; - - - __device__ inline - void operator()(void) const - { - extern __shared__ size_type shared_data[]; - - const integral_nonzero_constant< size_type , Reduce::StaticValueSize / sizeof(size_type) > - word_count( Reduce::value_size( m_functor ) / sizeof(size_type) ); - - { - reference_type value = Reduce::reference( shared_data + threadIdx.x * word_count.value ); - - m_functor.init( value ); - - // Number of blocks is bounded so that the reduction can be limited to two passes. - // Each thread block is given an approximately equal amount of work to perform. - // Accumulate the values for this block. - // The accumulation ordering does not match the final pass, but is arithmatically equivalent. - - const size_type iwork_beg = blockIdx.x * m_work_per_block ; - const size_type iwork_end = iwork_beg + m_work_per_block < m_work - ? iwork_beg + m_work_per_block : m_work ; - - for ( size_type iwork = threadIdx.x + iwork_beg ; iwork < iwork_end ; iwork += BlockSize ) { - m_functor( iwork , value ); - } - } - - // Reduce with final value at BlockSize - 1 location. - if ( cuda_single_inter_block_reduce_scan( - m_functor , m_global_block_begin + blockIdx.x , m_global_block_count , - shared_data , m_scratch_space , m_scratch_flags ) ) { - - // This is the final block with the final result at the final threads' location - - size_type * const shared = shared_data + BlockSizeMask * word_count.value ; - size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ; - - if ( threadIdx.x == 0 ) { Reduce::final( m_functor , shared ); } - - if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } - - for ( unsigned i = threadIdx.x ; i < word_count.value ; i += BlockSize ) { global[i] = shared[i]; } - } - } - - ParallelReduce( const FunctorType & functor , - const size_t nwork , - const pointer_type result = 0 , - const bool execute_immediately = true ) - : m_functor( functor ) - , m_scratch_space( 0 ) - , m_scratch_flags( 0 ) - , m_unified_space( 0 ) - , m_host_pointer( result ) - , m_work( nwork ) - , m_work_per_block( 0 ) - , m_local_block_count( 0 ) - , m_global_block_begin( 0 ) - , m_global_block_count( 0 ) - { - // At most 'max_grid' blocks: - const int max_grid = std::min( int(GridMax) , int(( nwork + BlockSizeMask ) / BlockSize )); - - // How much work per block: - m_work_per_block = ( nwork + max_grid - 1 ) / max_grid ; - - // How many block are really needed for this much work: - m_local_block_count = ( nwork + m_work_per_block - 1 ) / m_work_per_block ; - m_global_block_count = m_local_block_count ; - - m_scratch_space = cuda_internal_scratch_space( Reduce::value_size( functor ) * m_local_block_count ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); - m_unified_space = cuda_internal_scratch_unified( Reduce::value_size( functor ) ); - - if ( execute_immediately ) { execute(); } - } - - inline - void execute() const - { - const dim3 grid( m_local_block_count , 1 , 1 ); - const dim3 block( BlockSize , 1 , 1 ); - const int shmem = cuda_single_inter_block_reduce_scan_shmem( m_functor ); - - CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem ); // copy to device and execute - } - - void wait() const - { - Cuda::fence(); - - if ( m_host_pointer ) { - if ( m_unified_space ) { - const int count = Reduce::value_count( m_functor ); - for ( int i = 0 ; i < count ; ++i ) { m_host_pointer[i] = pointer_type(m_unified_space)[i] ; } - } - else { - const int size = Reduce::value_size( m_functor ); - DeepCopy( m_host_pointer , m_scratch_space , size ); - } - } - } -}; - - -template< class FunctorType > -class ParallelReduce< FunctorType , ParallelWorkRequest , Cuda > -{ -public: - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - typedef typename Reduce::reference_type reference_type ; - typedef Cuda::size_type size_type ; - - // Algorithmic constraints: - // (a) blockSize is a power of two - // (b) blockDim.x == BlockSize == 1 << BlockSizeShift - // (b) blockDim.y == blockDim.z == 1 - - enum { WarpCount = 8 }; - enum { BlockSize = CudaTraits::WarpSize << power_of_two< WarpCount >::value }; - enum { BlockSizeShift = power_of_two< BlockSize >::value }; - enum { BlockSizeMask = BlockSize - 1 }; - - enum { GridMaxComputeCapability_2x = 0x0ffff }; - enum { GridMax = BlockSize }; - - const FunctorType m_functor ; - size_type * m_scratch_space ; - size_type * m_scratch_flags ; - size_type * m_unified_space ; - pointer_type m_host_pointer ; - size_type m_shmem_begin ; - size_type m_shmem_end ; - size_type m_local_block_count ; - size_type m_global_block_begin ; - size_type m_global_block_count ; - - __device__ inline - void operator()(void) const - { - extern __shared__ size_type shared_data[]; - - const integral_nonzero_constant< size_type , Reduce::StaticValueSize / sizeof(size_type) > - word_count( Reduce::value_size( m_functor ) / sizeof(size_type) ); - - { - reference_type value = Reduce::reference( shared_data + threadIdx.x * word_count.value ); - - m_functor.init( value ); - - CudaExec exec( m_shmem_begin , m_shmem_end ); - - m_functor( Cuda( exec ) , value ); - } - - // Reduce with final value at BlockSize - 1 location. - if ( cuda_single_inter_block_reduce_scan( - m_functor , m_global_block_begin + blockIdx.x , m_global_block_count , - shared_data , m_scratch_space , m_scratch_flags ) ) { - - // This is the final block with the final result at the final threads' location - - size_type * const shared = shared_data + BlockSizeMask * word_count.value ; - size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ; - - if ( threadIdx.x == 0 ) { Reduce::final( m_functor , shared ); } - - if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } - - for ( unsigned i = threadIdx.x ; i < word_count.value ; i += BlockSize ) { global[i] = shared[i]; } - } - } - - - ParallelReduce( const FunctorType & functor , - const ParallelWorkRequest & work , - const pointer_type result = 0 , - const bool execute_immediately = true ) - : m_functor( functor ) - , m_scratch_space( 0 ) - , m_scratch_flags( 0 ) - , m_unified_space( 0 ) - , m_host_pointer( result ) - , m_shmem_begin( cuda_single_inter_block_reduce_scan_shmem( functor ) ) - , m_shmem_end( cuda_single_inter_block_reduce_scan_shmem( functor ) - + FunctorShmemSize< FunctorType >::value( functor ) ) - , m_local_block_count( 0 ) - , m_global_block_begin( 0 ) - , m_global_block_count( 0 ) - { - m_local_block_count = std::min( int(GridMax) , int(work.league_size) ); - m_global_block_count = std::min( int(GridMax) , int(work.league_size) ); - m_scratch_space = cuda_internal_scratch_space( Reduce::value_size( functor ) * m_local_block_count ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); - m_unified_space = cuda_internal_scratch_unified( Reduce::value_size( functor ) ); - - if ( execute_immediately ) { execute(); } - } - - inline - void execute() const - { - const dim3 grid( m_local_block_count , 1 , 1 ); - const dim3 block( BlockSize , 1 , 1 ); - - CudaParallelLaunch< ParallelReduce >( *this, grid, block, m_shmem_end ); // copy to device and execute - } - - void wait() const - { - Cuda::fence(); - - if ( m_host_pointer ) { - if ( m_unified_space ) { - const int count = Reduce::value_count( m_functor ); - for ( int i = 0 ; i < count ; ++i ) { m_host_pointer[i] = pointer_type(m_unified_space)[i] ; } - } - else { - const int size = Reduce::value_size( m_functor ); - DeepCopy( m_host_pointer , m_scratch_space , size ); - } - } - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class Functor > -class MultiFunctorParallelReduceMember ; - -template<> -class MultiFunctorParallelReduceMember -{ -private: - - MultiFunctorParallelReduceMember( const MultiFunctorParallelReduceMember & ); - MultiFunctorParallelReduceMember & operator = ( const MultiFunctorParallelReduceMember & ); - -protected: - - MultiFunctorParallelReduceMember() {} - -public: - - virtual unsigned block_count() const = 0 ; - - virtual ~MultiFunctorParallelReduceMember() {} - - virtual void execute( void * const host_pointer , - const unsigned global_block_begin , - const unsigned global_block_count ) = 0 ; - - virtual void wait() const = 0 ; -}; - -template< class Functor > -class MultiFunctorParallelReduceMember : public MultiFunctorParallelReduceMember { -public: - ParallelReduce< Functor , size_t , Cuda > m_functor ; - - MultiFunctorParallelReduceMember( const Functor & f , size_t nwork ) - : MultiFunctorParallelReduceMember() - , m_functor( f , nwork , 0 , false ) - {} - - virtual unsigned block_count() const { return m_functor.m_local_block_count ; } - - virtual void execute( void * const host_pointer , - const unsigned global_block_begin , - const unsigned global_block_count ) - { - m_functor.m_host_pointer = typename ReduceAdapter< Functor >::pointer_type(host_pointer); - m_functor.m_global_block_begin = global_block_begin ; - m_functor.m_global_block_count = global_block_count ; - m_functor.execute(); - } - - virtual void wait() const { m_functor.wait(); } -}; - -} // namespace Impl -} // namespace Kokkos - -namespace Kokkos { - -template<> -class MultiFunctorParallelReduce< Cuda > -{ -private: - - typedef std::vector< Impl::MultiFunctorParallelReduceMember * > MemberVector ; - - MemberVector m_functors ; - -public: - - MultiFunctorParallelReduce() - : m_functors() - {} - - ~MultiFunctorParallelReduce() - { - while ( ! m_functors.empty() ) { - delete m_functors.back(); - m_functors.pop_back(); - } - } - - template< class FunctorType > - void push_back( const size_t work_count , const FunctorType & f ) - { - m_functors.push_back( new Impl::MultiFunctorParallelReduceMember( f , work_count ) ); - } - - void execute( void * host_pointer ) - { - typename MemberVector::iterator m ; - - Cuda::size_type block_count = 0 ; - - for ( m = m_functors.begin() ; m != m_functors.end() ; ++m ) { - block_count += (*m)->block_count(); - } - - Cuda::size_type block_offset = 0 ; - - for ( m = m_functors.begin() ; m != m_functors.end() ; ++m ) { - (*m)->execute( host_pointer , block_offset , block_count ); - block_offset += (*m)->block_count(); - } - } - - void wait() const - { - if ( ! m_functors.empty() ) { (m_functors.back())->wait(); } - } -}; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class FunctorType , class WorkSpec > -class ParallelScan< FunctorType , WorkSpec , Cuda > -{ -public: - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - typedef typename Reduce::reference_type reference_type ; - typedef Cuda::size_type size_type ; - - // Algorithmic constraints: - // (a) blockSize is a power of two - // (b) blockDim.x == BlockSize == 1 << BlockSizeShift - // (b) blockDim.y == blockDim.z == 1 - // (c) gridDim.x <= blockDim.x * blockDim.x - // (d) gridDim.y == gridDim.z == 1 - - // blockDim.x must be power of two = 128 (4 warps) or 256 (8 warps) or 512 (16 warps) - // gridDim.x <= blockDim.x * blockDim.x - // - // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit testing - - enum { WarpCount = 4 }; - enum { BlockSize = CudaTraits::WarpSize << power_of_two< WarpCount >::value }; - enum { BlockSizeShift = power_of_two< BlockSize >::value }; - enum { BlockSizeMask = BlockSize - 1 }; - - enum { GridMaxComputeCapability_2x = 0x0ffff }; - enum { GridMax = ( BlockSize * BlockSize ) < GridMaxComputeCapability_2x - ? ( BlockSize * BlockSize ) : GridMaxComputeCapability_2x }; - - const FunctorType m_functor ; - size_type * m_scratch_space ; - size_type * m_scratch_flags ; - const size_type m_work ; - size_type m_work_per_block ; - size_type m_final ; - - //---------------------------------------- - - __device__ inline - void initial(void) const - { - extern __shared__ size_type shared_data[]; - - const integral_nonzero_constant< size_type , Reduce::StaticValueSize / sizeof(size_type) > - word_count( Reduce::value_size( m_functor ) / sizeof(size_type) ); - - size_type * const shared_value = shared_data + word_count.value * threadIdx.x ; - - m_functor.init( Reduce::reference( shared_value ) ); - - // Number of blocks is bounded so that the reduction can be limited to two passes. - // Each thread block is given an approximately equal amount of work to perform. - // Accumulate the values for this block. - // The accumulation ordering does not match the final pass, but is arithmatically equivalent. - - const size_type iwork_beg = blockIdx.x * m_work_per_block ; - const size_type iwork_end = iwork_beg + m_work_per_block < m_work - ? iwork_beg + m_work_per_block : m_work ; - - for ( size_type iwork = threadIdx.x + iwork_beg ; iwork < iwork_end ; iwork += BlockSize ) { - m_functor( iwork , Reduce::reference( shared_value ) , false ); - } - - // Reduce and scan, writing out scan of blocks' totals and block-groups' totals. - // Blocks' scan values are written to 'blockIdx.x' location. - // Block-groups' scan values are at: i = ( j * BlockSize - 1 ) for i < gridDim.x - cuda_single_inter_block_reduce_scan( m_functor , blockIdx.x , gridDim.x , shared_data , m_scratch_space , m_scratch_flags ); - } - - //---------------------------------------- - - __device__ inline - void final(void) const - { - extern __shared__ size_type shared_data[]; - - const integral_nonzero_constant< size_type , Reduce::StaticValueSize / sizeof(size_type) > - word_count( Reduce::value_size( m_functor ) / sizeof(size_type) ); - - // Use shared memory as an exclusive scan: { 0 , value[0] , value[1] , value[2] , ... } - size_type * const shared_prefix = shared_data + word_count.value * threadIdx.x ; - size_type * const shared_accum = shared_data + word_count.value * ( BlockSize + 1 ); - - // Starting value for this thread block is the previous block's total. - if ( blockIdx.x ) { - size_type * const block_total = m_scratch_space + word_count.value * ( blockIdx.x - 1 ); - for ( unsigned i = threadIdx.x ; i < word_count.value ; ++i ) { shared_accum[i] = block_total[i] ; } - } - else if ( 0 == threadIdx.x ) { - m_functor.init( Reduce::reference( shared_accum ) ); - } - - unsigned iwork_beg = blockIdx.x * m_work_per_block ; - const unsigned iwork_end = iwork_beg + m_work_per_block ; - - for ( ; iwork_beg < iwork_end ; iwork_beg += BlockSize ) { - - const unsigned iwork = threadIdx.x + iwork_beg ; - - __syncthreads(); // Don't overwrite previous iteration values until they are used - - m_functor.init( Reduce::reference( shared_prefix + word_count.value ) ); - - // Copy previous block's accumulation total into thread[0] prefix and inclusive scan value of this block - for ( unsigned i = threadIdx.x ; i < word_count.value ; ++i ) { - shared_data[i + word_count.value] = shared_data[i] = shared_accum[i] ; - } - - if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } // Protect against large scan values. - - // Call functor to accumulate inclusive scan value for this work item - if ( iwork < m_work ) { m_functor( iwork , Reduce::reference( shared_prefix + word_count.value ) , false ); } - - // Scan block values into locations shared_data[1..BlockSize] - cuda_intra_block_reduce_scan( m_functor , Reduce::pointer_type(shared_data+word_count.value) ); - - { - size_type * const block_total = shared_data + word_count.value * blockDim.x ; - for ( unsigned i = threadIdx.x ; i < word_count.value ; ++i ) { shared_accum[i] = block_total[i]; } - } - - // Call functor with exclusive scan value - if ( iwork < m_work ) { m_functor( iwork , Reduce::reference( shared_prefix ) , true ); } - } - } - - //---------------------------------------- - - __device__ inline - void operator()(void) const - { - if ( ! m_final ) { - initial(); - } - else { - final(); - } - } - - ParallelScan( const FunctorType & functor , - const size_t nwork ) - : m_functor( functor ) - , m_scratch_space( 0 ) - , m_scratch_flags( 0 ) - , m_work( nwork ) - , m_work_per_block( 0 ) - , m_final( false ) - { - // At most 'max_grid' blocks: - const int max_grid = std::min( int(GridMax) , int(( nwork + BlockSizeMask ) / BlockSize )); - - // How much work per block: - m_work_per_block = ( nwork + max_grid - 1 ) / max_grid ; - - // How many block are really needed for this much work: - const dim3 grid( ( nwork + m_work_per_block - 1 ) / m_work_per_block , 1 , 1 ); - const dim3 block( BlockSize , 1 , 1 ); - const int shmem = Reduce::value_size( functor ) * ( BlockSize + 2 ); - - m_scratch_space = cuda_internal_scratch_space( Reduce::value_size( functor ) * grid.x ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) * 1 ); - - m_final = false ; - CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute - - m_final = true ; - CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute - } - - void wait() const { Cuda::fence(); } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __CUDA_ARCH__ ) - -namespace Kokkos { -namespace Impl { - -template< typename Type > -struct CudaJoinFunctor { - typedef Type value_type ; - - KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , - volatile const value_type & input ) - { update += input ; } -}; - -} // namespace Impl -} // namespace Kokkos - -namespace Kokkos { - -template< typename TypeLocal , typename TypeGlobal > -__device__ inline TypeGlobal Cuda::team_scan( const TypeLocal & value , TypeGlobal * const global_accum ) -{ - enum { BlockSizeMax = 512 }; - - __shared__ TypeGlobal base_data[ BlockSizeMax + 1 ]; - - __syncthreads(); // Don't write in to shared data until all threads have entered this function - - if ( 0 == threadIdx.x ) { base_data[0] = 0 ; } - - base_data[ threadIdx.x + 1 ] = value ; - - Impl::cuda_intra_block_reduce_scan( Impl::CudaJoinFunctor() , base_data + 1 ); - - if ( global_accum ) { - if ( blockDim.x == threadIdx.x + 1 ) { - base_data[ blockDim.x ] = atomic_fetch_add( global_accum , base_data[ blockDim.x ] ); - } - __syncthreads(); // Wait for atomic - base_data[ threadIdx.x ] += base_data[ blockDim.x ] ; - } - - return base_data[ threadIdx.x ]; -} - -template< typename Type > -__device__ inline Type Cuda::team_scan( const Type & value ) -{ return team_scan( value , (Type*) 0 ); } - -} // namespace Kokkos - -#else /* ! defined( __CUDA_ARCH__ ) */ - -namespace Kokkos { - -template< typename Type > inline Type Cuda::team_scan( const Type & ) { return 0 ; } - -template< typename TypeLocal , typename TypeGlobal > -inline TypeGlobal Cuda::team_scan( const TypeLocal & , TypeGlobal * const ) { return 0 ; } - -} // namespace Kokkos - -#endif /* ! defined( __CUDA_ARCH__ ) */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* defined( __CUDACC__ ) */ - -#endif /* #ifndef KOKKOS_CUDA_PARALLEL_HPP */ - diff --git a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp deleted file mode 100644 index d9f2d8f..0000000 --- a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ /dev/null @@ -1,267 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDA_REDUCESCAN_HPP -#define KOKKOS_CUDA_REDUCESCAN_HPP - -#if defined( __CUDACC__ ) - -#include - -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- -// See section B.17 of Cuda C Programming Guide Version 3.2 -// for discussion of -// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor) -// function qualifier which could be used to improve performance. -//---------------------------------------------------------------------------- -// Maximize shared memory and minimize L1 cache: -// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferShared ); -// For 2.0 capability: 48 KB shared and 16 KB L1 -//---------------------------------------------------------------------------- -// Must have consistent '__shared__' statement across all device kernels. -// Since there may be more than one kernel in a file then have to make this -// a simple array of words. -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -/* - * Algorithmic constraints: - * (a) blockDim.x is a power of two - * (b) blockDim.x <= 512 - * (c) blockDim.y == blockDim.z == 1 - */ -template< bool DoScan , class FunctorType > -__device__ -void cuda_intra_block_reduce_scan( const FunctorType & functor , - const typename ReduceAdapter< FunctorType >::pointer_type base_data ) -{ - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - - const unsigned value_count = Reduce::value_count( functor ); - const unsigned BlockSizeMask = blockDim.x - 1 ; - - // Must have power of two thread count - - if ( BlockSizeMask & blockDim.x ) { cuda_abort("Cuda::cuda_intra_block_scan requires power-of-two blockDim"); } - -#define BLOCK_REDUCE_STEP( R , TD , S ) \ - if ( ! ( R & ((1<<(S+1))-1) ) ) \ - { functor.join( Reduce::reference(TD) , Reduce::reference(TD - (value_count< -__device__ -bool cuda_single_inter_block_reduce_scan( const FunctorType & functor , - const Cuda::size_type block_id , - const Cuda::size_type block_count , - Cuda::size_type * const shared_data , - Cuda::size_type * const global_data , - Cuda::size_type * const global_flags ) -{ - typedef Cuda::size_type size_type ; - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - typedef typename Reduce::reference_type reference_type ; - - enum { BlockSize = ArgBlockSize }; - enum { BlockSizeMask = BlockSize - 1 }; - enum { BlockSizeShift = power_of_two< BlockSize >::value }; - - const integral_nonzero_constant< size_type , Reduce::StaticValueSize / sizeof(size_type) > - word_count( Reduce::value_size( functor ) / sizeof(size_type) ); - - // Must have power of two thread count - if ( BlockSize != blockDim.x ) { cuda_abort("Cuda::cuda_inter_block_scan wrong blockDim.x"); } - - // Reduce the accumulation for the entire block. - cuda_intra_block_reduce_scan( functor , pointer_type(shared_data) ); - - { - // Write accumulation total to global scratch space. - // Accumulation total is the last thread's data. - size_type * const shared = shared_data + word_count.value * BlockSizeMask ; - size_type * const global = global_data + word_count.value * block_id ; - - for ( size_type i = threadIdx.x ; i < word_count.value ; i += BlockSize ) { global[i] = shared[i] ; } - } - - // Contributing blocks note that their contribution has been completed via an atomic-increment flag - // If this block is not the last block to contribute to this group then the block is done. - const bool is_last_block = - ! __syncthreads_or( threadIdx.x ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) ); - - if ( is_last_block ) { - - const size_type b = ( long(block_count) * long(threadIdx.x) ) >> BlockSizeShift ; - const size_type e = ( long(block_count) * long( threadIdx.x + 1 ) ) >> BlockSizeShift ; - - { - reference_type shared_value = Reduce::reference( shared_data + word_count.value * threadIdx.x ); - - functor.init( shared_value ); - - for ( size_type i = b ; i < e ; ++i ) { - functor.join( shared_value , Reduce::reference( global_data + word_count.value * i ) ); - } - } - - cuda_intra_block_reduce_scan( functor , pointer_type(shared_data) ); - - if ( DoScan ) { - - size_type * const shared_value = shared_data + word_count.value * ( threadIdx.x ? threadIdx.x - 1 : BlockSize ); - - if ( ! threadIdx.x ) { functor.init( Reduce::reference( shared_value ) ); } - - // Join previous inclusive scan value to each member - for ( size_type i = b ; i < e ; ++i ) { - size_type * const global_value = global_data + word_count.value * i ; - functor.join( Reduce::reference( shared_value ) , Reduce::reference( global_value ) ); - Reduce::copy( functor , global_value , shared_value ); - } - } - } - - return is_last_block ; -} - -template< bool DoScan , unsigned ArgBlockSize , class FunctorType > -inline -unsigned cuda_single_inter_block_reduce_scan_shmem( const FunctorType & functor ) -{ - return ( ArgBlockSize + 2 ) * ReduceAdapter< FunctorType >::value_size( functor ); -} - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #if defined( __CUDACC__ ) */ -#endif /* KOKKOS_CUDA_REDUCESCAN_HPP */ - diff --git a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp deleted file mode 100644 index 2e7ada6..0000000 --- a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp +++ /dev/null @@ -1,928 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDA_VIEW_HPP -#define KOKKOS_CUDA_VIEW_HPP - -#include - -#if defined( __CUDACC__ ) -#include -#endif - -#include -#include -#include -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template<> -struct AssertShapeBoundsAbort< CudaSpace > -{ - KOKKOS_INLINE_FUNCTION - static void apply( const size_t /* rank */ , - const size_t /* n0 */ , const size_t /* n1 */ , - const size_t /* n2 */ , const size_t /* n3 */ , - const size_t /* n4 */ , const size_t /* n5 */ , - const size_t /* n6 */ , const size_t /* n7 */ , - - const size_t /* arg_rank */ , - const size_t /* i0 */ , const size_t /* i1 */ , - const size_t /* i2 */ , const size_t /* i3 */ , - const size_t /* i4 */ , const size_t /* i5 */ , - const size_t /* i6 */ , const size_t /* i7 */ ) - { - Kokkos::cuda_abort("Kokkos::View array bounds violation"); - } -}; - -} -} - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -// Cuda 5.0 defines 'cudaTextureObject_t' -// to be an 'unsigned long long'. This chould change with -// future version of Cuda and this typedef would have to -// change accordingly. - -#if defined( CUDA_VERSION ) && ( 500 <= CUDA_VERSION ) - -typedef enable_if< - sizeof(::cudaTextureObject_t) == sizeof(const void *) , - ::cudaTextureObject_t >::type cuda_texture_object_type ; - -cuda_texture_object_type -cuda_texture_object_attach( - const cudaChannelFormatDesc & , - const void * const ); - -template< typename TextureType > -inline -cuda_texture_object_type -cuda_texture_object_attach( const void * const base_view_ptr ) -{ - return cuda_texture_object_attach( cudaCreateChannelDesc() , base_view_ptr ); -} - -#else - -typedef const void * cuda_texture_object_type ; - -template< typename TextureType > -inline -cuda_texture_object_type -cuda_texture_object_attach( const void * const ) -{ return 0 ; } - -#endif - -//---------------------------------------------------------------------------- - -template< typename ValueType > -struct CudaTextureFetch ; - -/** \brief Cuda texture fetch is limited to a subset of Cuda types. - * Map commonly used types to the required subset of Cuda types. - */ - -template< typename ValueType > -struct CudaTextureFetch< const ValueType > { -private: - - cuda_texture_object_type obj ; - -public: - - const ValueType * ptr ; - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch() : obj( 0 ) , ptr( 0 ) {} - - KOKKOS_INLINE_FUNCTION - ~CudaTextureFetch() {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch( const CudaTextureFetch & rhs ) - : obj( rhs.obj ) , ptr( rhs.ptr ) {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) - { obj = rhs.obj ; ptr = rhs.ptr ; return *this ; } - - explicit - CudaTextureFetch( ValueType * const base_view_ptr ) - : obj( cuda_texture_object_attach( base_view_ptr ) ) - , ptr( base_view_ptr ) {} - - template< typename iType > - KOKKOS_INLINE_FUNCTION - ValueType operator[]( const iType & i ) const - { - return ptr[ i ]; - } -}; - -template<> -struct CudaTextureFetch< const int > { -private: - - cuda_texture_object_type obj ; - -public: - - const int * ptr ; - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch() : obj( 0 ) , ptr( 0 ) {} - - KOKKOS_INLINE_FUNCTION - ~CudaTextureFetch() {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch( const CudaTextureFetch & rhs ) - : obj( rhs.obj ) , ptr( rhs.ptr ) {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) - { obj = rhs.obj ; ptr = rhs.ptr ; return *this ; } - - explicit - CudaTextureFetch( const int * const base_view_ptr ) - : obj( cuda_texture_object_attach( base_view_ptr ) ) - , ptr( base_view_ptr ) {} - - template< typename iType > - KOKKOS_INLINE_FUNCTION - int operator[]( const iType & i ) const - { -#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ ) -#ifdef KOKKOS_USE_LDG_INTRINSIC - return _ldg(&ptr[i]); -#else - return tex1Dfetch( obj , i ); -#endif -#else - return ptr[ i ]; -#endif - } -}; - -template<> -struct CudaTextureFetch< const unsigned int > { -private: - - cuda_texture_object_type obj ; - -public: - - const unsigned int * ptr ; - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch() : obj( 0 ) , ptr( 0 ) {} - - KOKKOS_INLINE_FUNCTION - ~CudaTextureFetch() {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch( const CudaTextureFetch & rhs ) - : obj( rhs.obj ) , ptr( rhs.ptr ) {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) - { obj = rhs.obj ; ptr = rhs.ptr ; return *this ; } - - explicit - CudaTextureFetch( const unsigned int * const base_view_ptr ) - : obj( cuda_texture_object_attach( base_view_ptr ) ) - , ptr( base_view_ptr ) {} - - template< typename iType > - KOKKOS_INLINE_FUNCTION - unsigned int operator[]( const iType & i ) const - { -#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ ) -#ifdef KOKKOS_USE_LDG_INTRINSIC - return _ldg(&ptr[i]); -#else - return tex1Dfetch( obj , i ); -#endif -#else - return ptr[ i ]; -#endif - } -}; - -template<> -struct CudaTextureFetch< const float > { -private: - - cuda_texture_object_type obj ; - -public: - - const float * ptr ; - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch() : obj( 0 ) , ptr( 0 ) {} - - KOKKOS_INLINE_FUNCTION - ~CudaTextureFetch() {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch( const CudaTextureFetch & rhs ) - : obj( rhs.obj ) , ptr( rhs.ptr ) {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) - { obj = rhs.obj ; ptr = rhs.ptr ; return *this ; } - - explicit - CudaTextureFetch( const float * const base_view_ptr ) - : obj( cuda_texture_object_attach( base_view_ptr ) ) - , ptr( base_view_ptr ) {} - - template< typename iType > - KOKKOS_INLINE_FUNCTION - float operator[]( const iType & i ) const - { -#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ ) -#ifdef KOKKOS_USE_LDG_INTRINSIC - return _ldg(&ptr[i]); -#else - return tex1Dfetch( obj , i ); -#endif -#else - return ptr[ i ]; -#endif - } -}; - -template<> -struct CudaTextureFetch< const double > { -private: - - cuda_texture_object_type obj ; - -public: - - const double * ptr ; - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch() : obj( 0 ) , ptr( 0 ) {} - - KOKKOS_INLINE_FUNCTION - ~CudaTextureFetch() {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch( const CudaTextureFetch & rhs ) - : obj( rhs.obj ) , ptr( rhs.ptr ) {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) - { obj = rhs.obj ; ptr = rhs.ptr ; return *this ; } - - explicit - CudaTextureFetch( const double * const base_view_ptr ) - : obj( cuda_texture_object_attach( base_view_ptr ) ) - , ptr( base_view_ptr ) {} - - template< typename iType > - KOKKOS_INLINE_FUNCTION - double operator[]( const iType & i ) const - { -#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ ) -#ifdef KOKKOS_USE_LDG_INTRINSIC - return _ldg(&ptr[i]); -#else - int2 v = tex1Dfetch( obj , i ); - return __hiloint2double(v.y, v.x); -#endif -#else - return ptr[ i ]; -#endif - } -}; - -template<> -struct CudaTextureFetch< const double2 > { -private: - - cuda_texture_object_type obj ; - -public: - - const double2 * ptr ; - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch() : obj( 0 ) , ptr( 0 ) {} - - KOKKOS_INLINE_FUNCTION - ~CudaTextureFetch() {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch( const CudaTextureFetch & rhs ) - : obj( rhs.obj ) , ptr( rhs.ptr ) {} - - KOKKOS_INLINE_FUNCTION - CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) - { obj = rhs.obj ; ptr = rhs.ptr ; return *this ; } - - explicit - CudaTextureFetch( const double2 * const base_view_ptr ) - : obj( cuda_texture_object_attach( base_view_ptr ) ) - , ptr( base_view_ptr ) {} - - template< typename iType > - KOKKOS_INLINE_FUNCTION - double2 operator[]( const iType & i ) const - { -#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ ) -#ifdef KOKKOS_USE_LDG_INTRINSIC - return _ldg(&ptr[i]); -#else - int4 v = tex1Dfetch(tex_obj , idx); - double2 retval = { __hiloint2double(v.y, v.x) , __hiloint2double(v.w, v.z) }; - return retval ; -#endif -#else - return ptr[ i ]; -#endif - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -struct CudaTexture {}; - -#if defined( CUDA_VERSION ) && ( 500 <= CUDA_VERSION ) - -/** \brief Replace LayoutDefault specialization */ -template< typename ScalarType , class Rank , class RankDynamic > -struct ViewSpecialize< const ScalarType , const ScalarType , - LayoutLeft , Rank , RankDynamic , - CudaSpace , MemoryRandomRead > -{ typedef CudaTexture type ; }; - -template< typename ScalarType , class Rank , class RankDynamic > -struct ViewSpecialize< const ScalarType , const ScalarType , - LayoutRight , Rank , RankDynamic , - CudaSpace , MemoryRandomRead > -{ typedef CudaTexture type ; }; - -/** \brief Scalar View matching **/ -template< typename ScalarType > -struct ViewSpecialize< const ScalarType , const ScalarType , - LayoutLeft , unsigned_<0> , unsigned_<0> , - CudaSpace , MemoryRandomRead > -{ typedef CudaTexture type ; }; - -template< typename ScalarType > -struct ViewSpecialize< const ScalarType , const ScalarType , - LayoutRight , unsigned_<0> , unsigned_<0> , - CudaSpace , MemoryRandomRead > -{ typedef CudaTexture type ; }; - -#endif - -//---------------------------------------------------------------------------- - -template<> -struct ViewAssignment< CudaTexture , CudaTexture , void > -{ - /** \brief Assign compatible views */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const typename enable_if<( - ViewAssignable< ViewTraits , ViewTraits >::value - ) >::type * = 0 ) - { - typedef View DstViewType ; - - typedef typename DstViewType::shape_type shape_type ; - typedef typename DstViewType::memory_space memory_space ; - typedef typename DstViewType::memory_traits memory_traits ; - - dst.m_texture = src.m_texture ; - dst.m_stride = src.m_stride ; - - shape_type::assign( dst.m_shape, - src.m_shape.N0 , src.m_shape.N1 , src.m_shape.N2 , src.m_shape.N3 , - src.m_shape.N4 , src.m_shape.N5 , src.m_shape.N6 , src.m_shape.N7 ); - } -}; - - -template<> -struct ViewAssignment< CudaTexture , LayoutDefault , void > -{ - /** \brief Assign compatible views */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - inline - ViewAssignment( View & dst , - const View & src , - const typename enable_if<( - ViewAssignable< ViewTraits , ViewTraits >::value - )>::type * = 0 ) - { - typedef View DstViewType ; - - typedef typename DstViewType::shape_type shape_type ; - typedef typename DstViewType::scalar_type scalar_type ; - typedef typename DstViewType::stride_type stride_type ; - - dst.m_texture = CudaTextureFetch< scalar_type >( src.m_ptr_on_device ); - - shape_type::assign( dst.m_shape, - src.m_shape.N0 , src.m_shape.N1 , src.m_shape.N2 , src.m_shape.N3 , - src.m_shape.N4 , src.m_shape.N5 , src.m_shape.N6 , src.m_shape.N7 ); - - stride_type::assign( dst.m_stride , src.m_stride.value ); - } -}; - -//---------------------------------------------------------------------------- - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -template< class T , class L, class D , class M > -class View< T , L , D , M , Impl::CudaTexture > - : public ViewTraits< T , L , D , M > -{ -public: - - typedef ViewTraits< T , L , D , M > traits ; - -private: - - template< class , class , class > friend struct Impl::ViewAssignment ; - - typedef Impl::LayoutStride< typename traits::shape_type , - typename traits::array_layout > stride_type ; - - Impl::CudaTextureFetch m_texture ; - typename traits::shape_type m_shape ; - stride_type m_stride ; - -public: - - typedef Impl::CudaTexture specialize ; - - typedef View< typename traits::const_data_type , - typename traits::array_layout , - typename traits::device_type , - typename traits::memory_traits > const_type ; - - typedef View< typename traits::non_const_data_type , - typename traits::array_layout , - typename traits::device_type::host_mirror_device_type , - void > HostMirror ; - - enum { Rank = traits::rank }; - - KOKKOS_INLINE_FUNCTION typename traits::shape_type shape() const { return m_shape ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const { return m_shape.N0 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_shape.N1 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_shape.N2 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_shape.N3 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_shape.N4 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_shape.N5 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_shape.N6 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_shape.N7 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type size() const - { - return m_shape.N0 - * m_shape.N1 - * m_shape.N2 - * m_shape.N3 - * m_shape.N4 - * m_shape.N5 - * m_shape.N6 - * m_shape.N7 - ; - } - - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename traits::size_type dimension( const iType & i ) const - { return Impl::dimension( m_shape , i ); } - - //------------------------------------ - - View() : m_texture() - { - traits::shape_type::assign(m_shape,0,0,0,0,0,0,0,0); - stride_type::assign( m_stride , 0 ); - } - - ~View() {} - - View( const View & rhs ) - : m_texture( rhs.m_texture ) - , m_stride( rhs.m_stride ) - { m_shape = rhs.m_shape ; } - - View & operator = ( const View & rhs ) - { - (void)Impl::ViewAssignment< Impl::CudaTexture , Impl::CudaTexture >( *this , rhs ); - return *this ; - } - - template< class RT , class RL, class RD , class RM , class RS > - View( const View & rhs ) - : m_texture(0) - { - Impl::ViewAssignment< Impl::CudaTexture , RS >( *this , rhs ); - } - - template< class RT , class RL, class RD, class RM , class RS > - View & operator = ( const View & rhs ) - { - Impl::ViewAssignment< Impl::CudaTexture , RS >( *this , rhs ); - return *this ; - } - - //------------------------------------ - - KOKKOS_INLINE_FUNCTION - bool is_null() const { return 0 == m_texture.ptr ; } - - //------------------------------------ - // Rank = 1 access operators: - - template < typename iType0 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , traits , LayoutLeft , 1 , iType0 >::type - operator[] ( const iType0 & i0 ) const - { - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_shape, i0 ); - return m_texture[ i0 ]; - } - - template < typename iType0 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , traits , LayoutRight , 1 , iType0 >::type - operator[] ( const iType0 & i0 ) const - { - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_shape, i0 ); - return m_texture[ i0 ]; - } - - template < typename iType0 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , traits , LayoutLeft , 1 , iType0 >::type - operator() ( const iType0 & i0 ) const - { - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_shape, i0 ); - return m_texture[ i0 ]; - } - - template < typename iType0 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , traits , LayoutRight , 1 , iType0 >::type - operator() ( const iType0 & i0 ) const - { - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_shape, i0 ); - return m_texture[ i0 ]; - } - - //------------------------------------ - // Layout left: - - - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , traits, LayoutLeft, 2, iType0, iType1 >::type - operator() ( const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i0 + m_stride.value * i1 ]; - } - - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutLeft, 3, iType0, iType1, iType2 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_3( m_shape, i0,i1,i2 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i0 + m_stride.value * ( - i1 + m_shape.N1 * i2 ) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutLeft, 4, iType0, iType1, iType2, iType3 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_4( m_shape, i0,i1,i2,i3 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * i3 )) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutLeft, 5, iType0, iType1, iType2, iType3, iType4 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_5( m_shape, i0,i1,i2,i3,i4 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * i4 ))) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutLeft, 6, iType0, iType1, iType2, iType3, iType4, iType5 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_6( m_shape, i0,i1,i2,i3,i4,i5 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * ( - i4 + m_shape.N4 * i5 )))) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutLeft, 7, iType0, iType1, iType2, iType3, iType4, iType5, iType6 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_7( m_shape, i0,i1,i2,i3,i4,i5,i6 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * ( - i4 + m_shape.N4 * ( - i5 + m_shape.N5 * i6 ))))) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 , typename iType7 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutLeft, 8, iType0, iType1, iType2, iType3, iType4, iType5, iType6, iType7 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_8( m_shape, i0,i1,i2,i3,i4,i5,i6,i7 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * ( - i4 + m_shape.N4 * ( - i5 + m_shape.N5 * ( - i6 + m_shape.N6 * i7 )))))) ]; - } - - - //------------------------------------ - // Layout right: - - - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutRight, 2, iType0, iType1 >::type - operator() ( const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i1 + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutRight, 3, iType0, iType1, iType2 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_3( m_shape, i0,i1,i2 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i2 + m_shape.N2 * i1 + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutRight, 4, iType0, iType1, iType2, iType3 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_4( m_shape, i0,i1,i2,i3 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 )) + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutRight, 5, iType0, iType1, iType2, iType3, iType4 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_5( m_shape, i0,i1,i2,i3,i4 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 ))) + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutRight, 6, iType0, iType1, iType2, iType3, iType4, iType5 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_6( m_shape, i0,i1,i2,i3,i4,i5 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i5 + m_shape.N5 * ( - i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 )))) + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutRight, 7, iType0, iType1, iType2, iType3, iType4, iType5, iType6 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_7( m_shape, i0,i1,i2,i3,i4,i5,i6 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i6 + m_shape.N6 * ( - i5 + m_shape.N5 * ( - i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 ))))) + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 , typename iType7 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type , - traits, LayoutRight, 8, iType0, iType1, iType2, iType3, iType4, iType5, iType6, iType7 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_8( m_shape, i0,i1,i2,i3,i4,i5,i6,i7 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_texture.ptr ); - - return m_texture[ i7 + m_shape.N7 * ( - i6 + m_shape.N6 * ( - i5 + m_shape.N5 * ( - i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 )))))) + i0 * m_stride.value ]; - } - - //------------------------------------ - - KOKKOS_INLINE_FUNCTION - typename traits::scalar_type * ptr_on_device() const { return m_texture.ptr ; } - - // Stride of physical storage, dimensioned to at least Rank - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { - enum { is_left = Impl::is_same< typename traits::array_layout , LayoutLeft >::value }; - - if ( 1 == Rank ) { - s[0] = 1 ; - } - else if ( is_left ) { - s[0] = 1 ; - s[1] = m_stride.value ; - for ( int i = 2 ; i < Rank ; ++i ) { s[i] = s[i-1] * dimension(i-1); } - } - else { - s[0] = m_stride.value ; - s[Rank-1] = 1 ; - for ( int i = Rank - 2 ; 0 < i ; --i ) { s[i] = s[i+1] * dimension(i+1); } - } - } -}; - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */ - diff --git a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp b/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp deleted file mode 100644 index e0d2fcc..0000000 --- a/kokkos/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp +++ /dev/null @@ -1,101 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDA_ABORT_HPP -#define KOKKOS_CUDA_ABORT_HPP - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) - -#if ! defined( CUDA_VERSION ) || ( CUDA_VERSION < 401 ) -#errof "Cuda version 4.1 or greater required" -#endif - -#if ( __CUDA_ARCH__ < 200 ) -#error "Cuda device capability 2.0 or greater required" -#endif - -extern "C" { -/* Cuda runtime function, declared in - * Requires capability 2.x or better. - */ -extern __device__ void __assertfail( - const void *message, - const void *file, - unsigned int line, - const void *function, - size_t charsize); -} - -namespace Kokkos { - -__device__ inline -void cuda_abort( const char * const message ) -{ - const char empty[] = "" ; - - __assertfail( (const void *) message , - (const void *) empty , - (unsigned int) 0 , - (const void *) empty , - sizeof(char) ); -} - -} // namespace Kokkos - -#else - -namespace Kokkos { -KOKKOS_INLINE_FUNCTION -void cuda_abort( const char * const ) {} -} - -#endif /* #if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */ - diff --git a/kokkos/kokkos/core/src/Kokkos_Atomic.hpp b/kokkos/kokkos/core/src/Kokkos_Atomic.hpp deleted file mode 100644 index 407d425..0000000 --- a/kokkos/kokkos/core/src/Kokkos_Atomic.hpp +++ /dev/null @@ -1,159 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -/// \file Kokkos_Atomic.hpp -/// \brief Atomic functions -/// -/// This header file defines prototypes for the following atomic functions: -/// - exchange -/// - compare and exchange -/// - add -/// -/// Supported types include: -/// - signed and unsigned 4 and 8 byte integers -/// - float -/// - double -/// -/// They are implemented through GCC compatible intrinsics, OpenMP -/// directives and native CUDA intrinsics. -/// -/// Including this header file requires one of the following -/// compilers: -/// - NVCC (for CUDA device code only) -/// - GCC (for host code only) -/// - Intel (for host code only) -/// - A compiler that supports OpenMP 3.1 (for host code only) - -#ifndef KOKKOS_ATOMIC_HPP -#define KOKKOS_ATOMIC_HPP - -#include -#include - -//---------------------------------------------------------------------------- - -#if defined( __CUDA_ARCH__ ) - -// Compiling NVIDIA device code, must use Cuda atomics: - -#define KOKKOS_ATOMICS_USE_CUDA - -#elif ! defined( KOKKOS_ATOMICS_USE_GCC ) && \ - ! defined( KOKKOS_ATOMICS_USE_INTEL ) && \ - ! defined( KOKKOS_ATOMICS_USE_OMP31 ) - -// Compiling for non-Cuda atomic implementation has not been pre-selected. -// Choose the best implementation for the detected compiler. -// Preference: GCC, INTEL, OMP31 - -#if defined( __GNUC__ ) || defined( __GNUG__ ) - -#define KOKKOS_ATOMICS_USE_GCC - -#elif defined( __INTEL_COMPILER ) || defined( _CRAYC) - -#define KOKKOS_ATOMICS_USE_INTEL - -#elif defined( _OPENMP ) && ( 201107 <= _OPENMP ) - -#define KOKKOS_ATOMICS_USE_OMP31 - -#else - -#error "KOKKOS_ATOMICS_USE : Unsupported compiler" - -#endif - -#endif /* Not pre-selected atomic implementation */ - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -inline -const char * atomic_query_version() -{ -#if defined( KOKKOS_ATOMICS_USE_CUDA ) - return "KOKKOS_ATOMICS_USE_CUDA" ; -#elif defined( KOKKOS_ATOMICS_USE_GCC ) - return "KOKKOS_ATOMICS_USE_GCC" ; -#elif defined( KOKKOS_ATOMICS_USE_INTEL ) - return "KOKKOS_ATOMICS_USE_INTEL" ; -#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) - return "KOKKOS_ATOMICS_USE_OMP31" ; -#endif -} - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -// Atomic exchange -// -// template< typename T > -// T atomic_exchange( volatile T* const dest , const T val ) -// { T tmp = *dest ; *dest = val ; return tmp ; } - -#include "impl/Kokkos_Atomic_Exchange.hpp" - -//---------------------------------------------------------------------------- -// Atomic compare-and-exchange -// -// template -// bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val) -// { bool equal = compare == *dest ; if ( equal ) { *dest = val ; } return equal ; } - -#include "impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp" - -//---------------------------------------------------------------------------- -// Atomic fetch and add -// -// template -// T atomic_fetch_add(volatile T* const dest, const T val) -// { T tmp = *dest ; *dest += val ; return tmp ; } - -#include "impl/Kokkos_Atomic_Fetch_Add.hpp" - -//---------------------------------------------------------------------------- - -#endif /* KOKKOS_ATOMIC_HPP */ - diff --git a/kokkos/kokkos/core/src/Kokkos_CrsArray.hpp b/kokkos/kokkos/core/src/Kokkos_CrsArray.hpp deleted file mode 100644 index 8f1b838..0000000 --- a/kokkos/kokkos/core/src/Kokkos_CrsArray.hpp +++ /dev/null @@ -1,170 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CRSARRAY_HPP -#define KOKKOS_CRSARRAY_HPP - -#include -#include - -#include - -namespace Kokkos { - -/// \class CrsArray -/// \brief Compressed row storage array. -/// -/// \tparam DataType The type of stored entries. If a CrsArray is -/// used as the graph of a sparse matrix, then this is usually an -/// integer type, the type of the column indices in the sparse -/// matrix. -/// -/// \tparam Arg1Type The second template parameter, corresponding -/// either to the Device type (if there are no more template -/// parameters) or to the Layout type (if there is at least one more -/// template parameter). -/// -/// \tparam Arg2Type The third template parameter, which if provided -/// corresponds to the Device type. -/// -/// \tparam SizeType The type of row offsets. Usually the default -/// parameter suffices. However, setting a nondefault value is -/// necessary in some cases, for example, if you want to have a -/// sparse matrices with dimensions (and therefore column indices) -/// that fit in \c int, but want to store more than INT_MAX -/// entries in the sparse matrix. -/// -/// A row has a range of entries: -///
    -///
  • row_map[i0] <= entry < row_map[i0+1]
  • -///
  • 0 <= i1 < row_map[i0+1] - row_map[i0]
  • -///
  • entries( entry , i2 , i3 , ... );
  • -///
  • entries( row_map[i0] + i1 , i2 , i3 , ... );
  • -///
-template< class DataType, - class Arg1Type, - class Arg2Type = void, - typename SizeType = typename ViewTraits::size_type> -class CrsArray { -private: - typedef ViewTraits traits; - -public: - typedef DataType data_type; - typedef typename traits::array_layout array_layout; - typedef typename traits::device_type device_type; - typedef SizeType size_type; - - typedef CrsArray< DataType , Arg1Type , Arg2Type , SizeType > crsarray_type; - typedef CrsArray< DataType , array_layout , typename device_type::host_mirror_device_type , SizeType > HostMirror; - typedef View< const size_type* , array_layout, device_type > row_map_type; - typedef View< DataType* , array_layout, device_type > entries_type; - - entries_type entries; - row_map_type row_map; - - //! Construct an empty view. - CrsArray () : entries(), row_map() {} - - //! Copy constructor (shallow copy). - CrsArray (const CrsArray& rhs) : entries (rhs.entries), row_map (rhs.row_map) - {} - - /** \brief Assign to a view of the rhs array. - * If the old view is the last view - * then allocated memory is deallocated. - */ - CrsArray& operator= (const CrsArray& rhs) { - entries = rhs.entries; - row_map = rhs.row_map; - return *this; - } - - /** \brief Destroy this view of the array. - * If the last view then allocated memory is deallocated. - */ - ~CrsArray() {} -}; - -//---------------------------------------------------------------------------- - -template< class CrsArrayType , class InputSizeType > -typename CrsArrayType::crsarray_type -create_crsarray( const std::string & label , - const std::vector< InputSizeType > & input ); - -template< class CrsArrayType , class InputSizeType > -typename CrsArrayType::crsarray_type -create_crsarray( const std::string & label , - const std::vector< std::vector< InputSizeType > > & input ); - -//---------------------------------------------------------------------------- - -template< class DataType , - class Arg1Type , - class Arg2Type , - typename SizeType > -typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror -create_mirror_view( const CrsArray & input ); - -template< class DataType , - class Arg1Type , - class Arg2Type , - typename SizeType > -typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror -create_mirror( const CrsArray & input ); - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_CRSARRAY_HPP */ - diff --git a/kokkos/kokkos/core/src/Kokkos_Cuda.hpp b/kokkos/kokkos/core/src/Kokkos_Cuda.hpp deleted file mode 100644 index 7434be2..0000000 --- a/kokkos/kokkos/core/src/Kokkos_Cuda.hpp +++ /dev/null @@ -1,323 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDA_HPP -#define KOKKOS_CUDA_HPP - -#include -#include - -#include -#ifdef KOKKOS_HAVE_OPENMP -#include -#else -#ifdef KOKKOS_HAVE_PTHREAD -#include -#else -#include -#endif -#endif -#include -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { -class CudaExec ; -} // namespace Impl -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -/// \class Cuda -/// \brief Kokkos device that uses CUDA to run on GPUs. -/// -/// A "device" represents a parallel execution model. It tells Kokkos -/// how to parallelize the execution of kernels in a parallel_for or -/// parallel_reduce. For example, the Threads device uses Pthreads or -/// C++11 threads on a CPU, the OpenMP device uses the OpenMP language -/// extensions, and the Serial device executes "parallel" kernels -/// sequentially. The Cuda device uses NVIDIA's CUDA programming -/// model to execute kernels in parallel on GPUs. -class Cuda { -public: - //! \name Type declarations that all Kokkos devices must provide. - //@{ - - //! The device type (same as this class). - typedef Cuda device_type ; - //! This device's preferred memory space. - typedef CudaSpace memory_space ; - //! The size_type typedef best suited for this device. - typedef CudaSpace::size_type size_type ; - //! This device's preferred array layout. - typedef LayoutLeft array_layout ; - //! This device's host mirror type. -#ifdef KOKKOS_HAVE_OPENMP - typedef Kokkos::OpenMP host_mirror_device_type ; -#else -#ifdef KOKKOS_HAVE_PTHREAD - typedef Kokkos::Threads host_mirror_device_type ; -#else - typedef Kokkos::Serial host_mirror_device_type ; -#endif -#endif - //@} - //! \name Functions that all Kokkos devices must implement. - //@{ - - /// \brief True if and only if this method is being called in a - /// thread-parallel function. - KOKKOS_INLINE_FUNCTION static int in_parallel() { -#if defined( __CUDA_ARCH__ ) - return true; -#else - return false; -#endif - } - - /** \brief Set the device in a "sleep" state. - * - * This function sets the device in a "sleep" state in which it is - * not ready for work. This may consume less resources than if the - * device were in an "awake" state, but it may also take time to - * bring the device from a sleep state to be ready for work. - * - * \return True if the device is in the "sleep" state, else false if - * the device is actively working and could not enter the "sleep" - * state. - */ - static bool sleep(); - - /// \brief Wake the device from the 'sleep' state so it is ready for work. - /// - /// \return True if the device is in the "ready" state, else "false" - /// if the device is actively working (which also means that it's - /// awake). - static bool wake(); - - /// \brief Wait until all dispatched functors complete. - /// - /// The parallel_for or parallel_reduce dispatch of a functor may - /// return asynchronously, before the functor completes. This - /// method does not return until all dispatched functors on this - /// device have completed. - static void fence(); - - //! Free any resources being consumed by the device. - static void finalize(); - - //! Print configuration information to the given output stream. - static void print_configuration( std::ostream & , const bool detail = false ); - - //@} - //-------------------------------------------------------------------------- - //! \name Device-specific functions - //@{ - - struct SelectDevice { - int cuda_device_id ; - SelectDevice() : cuda_device_id(0) {} - explicit SelectDevice( int id ) : cuda_device_id( id ) {} - }; - - //! Initialize, telling the CUDA run-time library which device to use. - static void initialize( const SelectDevice = SelectDevice() ); - - static int is_initialized(); - - /// \brief Cuda device architecture of the selected device. - /// - /// This matches the __CUDA_ARCH__ specification. - static size_type device_arch(); - - //! Query device count. - static size_type detect_device_count(); - - /** \brief Detect the available devices and their architecture - * as defined by the __CUDA_ARCH__ specification. - */ - static std::vector detect_device_arch(); - - static unsigned team_max(); - - //@} - //-------------------------------------------------------------------------- -#if defined( __CUDA_ARCH__ ) - //! \name Functions for the functor device interface - //@{ - - __device__ inline int league_size() const { return gridDim.x ; } - __device__ inline int league_rank() const { return blockIdx.x ; } - - __device__ inline int team_size() const { return blockDim.x ; } - __device__ inline int team_rank() const { return threadIdx.x ; } - - __device__ inline void team_barrier() const { __syncthreads(); } - __device__ inline unsigned int team_barrier_count(bool value) const - { return __syncthreads_count(value); } - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value ; - */ - template< typename Type > - __device__ inline Type team_scan( const Type & value ); - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the - * league's parallel execution, be the scan's total. - * Parallel execution ordering of the league's teams is non-deterministic. - * As such the base value for each team's scan operation is similarly - * non-deterministic. - */ - template< typename TypeLocal , typename TypeGlobal > - __device__ inline TypeGlobal team_scan( const TypeLocal & value , TypeGlobal * const global_accum ); - - - //! Get a pointer to shared memory for this team. - __device__ inline void * get_shmem( const int size ); - - __device__ inline Cuda( Impl::CudaExec & exec ) : m_exec(exec) {} - __device__ inline Cuda( const Cuda & rhs ) : m_exec(rhs.m_exec) {} - - //@} - //-------------------------------------------------------------------------- - -private: - - Impl::CudaExec & m_exec ; - - //-------------------------------------------------------------------------- -#else - - int league_size() const ; - int league_rank() const ; - - int team_size() const ; - int team_rank() const ; - - void team_barrier() const ; - unsigned int team_barrier_count(bool) const ; - - template< typename T > - inline T team_scan(const T& value); - - template< typename TypeLocal , typename TypeGlobal > - inline TypeGlobal team_scan( const TypeLocal & value , TypeGlobal * const global_accum ); - - void * get_shmem( const int size ); - - Cuda( Impl::CudaExec & ); - -#endif - -}; - -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -/** \brief Cuda-specific parallel work configuration */ - -struct CudaWorkConfig { - Cuda::size_type grid[3] ; //< Grid dimensions - Cuda::size_type block[3] ; //< Block dimensions - Cuda::size_type shared ; //< Shared memory size - - CudaWorkConfig() - { - enum { WarpSize = 32 }; - grid[0] = grid[1] = grid[2] = 1 ; - block[1] = block[2] = 1 ; - block[0] = 8 * WarpSize ; - shared = 0 ; - } -}; - -template< class FunctorType > -inline -void parallel_for( const CudaWorkConfig & work_config , - const FunctorType & functor ) -{ - Impl::ParallelFor< FunctorType , CudaWorkConfig , Cuda > - ( work_config , functor ); -} - -template< class FunctorType , class FinalizeType > -inline -void parallel_reduce( const CudaWorkConfig & work_config , - const FunctorType & functor , - const FinalizeType & finalize ); - -template< class FunctorType > -inline -typename FunctorType::value_type -parallel_reduce( const CudaWorkConfig & work_config , - const FunctorType & functor ); - -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - -#include -#include -#include - -#endif /* #ifndef KOKKOS_CUDA_HPP */ - -//---------------------------------------------------------------------------- - - diff --git a/kokkos/kokkos/core/src/Kokkos_CudaSpace.hpp b/kokkos/kokkos/core/src/Kokkos_CudaSpace.hpp deleted file mode 100644 index e89ac20..0000000 --- a/kokkos/kokkos/core/src/Kokkos_CudaSpace.hpp +++ /dev/null @@ -1,179 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDASPACE_HPP -#define KOKKOS_CUDASPACE_HPP - -#if defined( __CUDACC__ ) -#include -#endif - -#include -#include -#include - -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -/** \brief Cuda memory management */ - -class CudaSpace { -public: - - typedef CudaSpace memory_space ; - typedef unsigned int size_type ; - - /** \brief Allocate a contiguous block of memory on the Cuda device - * with size = scalar_size * scalar_count. - * - * The input label is associated with the block of memory. - * The block of memory is tracked via reference counting where - * allocation gives it a reference count of one. - * - * Allocation may only occur on the master thread of the process. - */ - static void * allocate( const std::string & label , - const std::type_info & scalar_type , - const size_t scalar_size , - const size_t scalar_count ); - - /** \brief Increment the reference count of the block of memory - * in which the input pointer resides. - * - * Reference counting only occurs on the master thread. - */ - static void increment( const void * ); - - /** \brief Decrement the reference count of the block of memory - * in which the input pointer resides. If the reference - * count falls to zero the memory is deallocated. - * - * Reference counting only occurs on the master thread. - */ - static void decrement( const void * ); - - /** \brief Print all tracked memory to the output stream. */ - static void print_memory_view( std::ostream & ); - - /** \brief Retrieve label associated with the input pointer */ - static std::string query_label( const void * ); - - /*--------------------------------*/ - - static void access_error(); - static void access_error( const void * const ); - - /*--------------------------------*/ -}; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template<> -struct DeepCopy { - DeepCopy( void * dst , const void * src , size_t ); -}; - -template<> -struct DeepCopy { - DeepCopy( void * dst , const void * src , size_t ); -}; - -template<> -struct DeepCopy { - DeepCopy( void * dst , const void * src , size_t ); -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -/** \brief Cuda code accessing Cuda data is good. */ -template<> -struct VerifyExecutionSpaceCanAccessDataSpace< CudaSpace , CudaSpace > -{ - KOKKOS_INLINE_FUNCTION static void verify( void ) {} - KOKKOS_INLINE_FUNCTION static void verify( const void * ) {} -}; - -/** \brief Cuda code accessing non-Cuda data is bad. */ -template<> -struct VerifyExecutionSpaceCanAccessDataSpace< CudaSpace , HostSpace > -{ - KOKKOS_INLINE_FUNCTION static void verify(void) - { Kokkos::cuda_abort("Cuda code called function restricted to HostSpace"); } - - KOKKOS_INLINE_FUNCTION static void verify( const void * ) - { Kokkos::cuda_abort("Cuda code attempted to access HostSpace memory"); } -}; - -/** \brief Produce error message when trying to access Cuda - * memory on the host. - */ -template<> -struct VerifyExecutionSpaceCanAccessDataSpace< HostSpace , CudaSpace > -{ - inline static void verify( void ) { CudaSpace::access_error(); } - inline static void verify( const void * p ) { CudaSpace::access_error(p); } -}; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #define KOKKOS_CUDASPACE_HPP */ - diff --git a/kokkos/kokkos/core/src/Kokkos_CudaTypes.hpp b/kokkos/kokkos/core/src/Kokkos_CudaTypes.hpp deleted file mode 100644 index 899e7e1..0000000 --- a/kokkos/kokkos/core/src/Kokkos_CudaTypes.hpp +++ /dev/null @@ -1,139 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDATYPES_HPP -#define KOKKOS_CUDATYPES_HPP - -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __CUDACC__ ) - -namespace Kokkos { - -typedef ::int2 int2 ; -typedef ::int3 int3 ; -typedef ::int4 int4 ; - -typedef ::float2 float2 ; -typedef ::float3 float3 ; -typedef ::float4 float4 ; - -typedef ::double2 double2 ; -typedef ::double3 double3 ; -typedef ::double4 double4 ; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#else /* NOT #if defined( __CUDACC__ ) */ - -namespace Kokkos { - -struct int2 { - int x; - int y; -}; - -struct int3 { - int x; - int y; - int z; -}; - -struct int4 { - int x; - int y; - int z; - int w; -}; - -struct float2 { - float x; - float y; -}; - -struct float3 { - float x; - float y; - float z; -}; - -struct float4 { - float x; - float y; - float z; - float w; -}; - -struct double2 { - double x; - double y; -}; - -struct double3 { - double x; - double y; - double z; -}; - -struct double4 { - double x; - double y; - double z; - double w; -}; - -} // namespace Kokkos - -#endif - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #define KOKKOS_CUDATYPES_HPP */ - diff --git a/kokkos/kokkos/core/src/Kokkos_HostSpace.hpp b/kokkos/kokkos/core/src/Kokkos_HostSpace.hpp deleted file mode 100644 index 028a403..0000000 --- a/kokkos/kokkos/core/src/Kokkos_HostSpace.hpp +++ /dev/null @@ -1,143 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_HOSTSPACE_HPP -#define KOKKOS_HOSTSPACE_HPP - -#include -#include -#include - -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -/** \brief Memory management on the host for devices */ - -class HostSpace { -public: - - typedef HostSpace memory_space ; - typedef size_t size_type ; - - /** \brief Allocate a contiguous block of memory on the Cuda device - * with size = scalar_size * scalar_count. - * - * The input label is associated with the block of memory. - * The block of memory is tracked via reference counting where - * allocation gives it a reference count of one. - * - * Allocation may only occur on the master thread of the process. - */ - static void * allocate( const std::string & label , - const std::type_info & scalar_type , - const size_t scalar_size , - const size_t scalar_count ); - - /** \brief Increment the reference count of the block of memory - * in which the input pointer resides. - * - * Reference counting only occurs on the master thread. - */ - static void increment( const void * ); - - /** \brief Decrement the reference count of the block of memory - * in which the input pointer resides. If the reference - * count falls to zero the memory is deallocated. - * - * Reference counting only occurs on the master thread. - */ - static void decrement( const void * ); - - /*--------------------------------*/ - - /** \brief Print all tracked memory to the output stream. */ - static void print_memory_view( std::ostream & ); - - /** \brief Retrieve label associated with the input pointer */ - static std::string query_label( const void * ); - - /*--------------------------------*/ - /* Functions unique to the HostSpace */ - - static int in_parallel(); - - static void register_in_parallel( int (*)() ); -}; - -//---------------------------------------------------------------------------- - -template< class ExecutionSpace , class DataSpace > -struct VerifyExecutionSpaceCanAccessDataSpace ; - -template<> -struct VerifyExecutionSpaceCanAccessDataSpace< HostSpace , HostSpace > -{ - inline static void verify(void) {} - inline static void verify(const void *) {} -}; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class , class > struct DeepCopy ; - -template<> -struct DeepCopy { - DeepCopy( void * dst , const void * src , size_t n ); -}; - -} // namespace Impl -} // namespace Kokkos - -#endif /* #define KOKKOS_HOSTSPACE_HPP */ - diff --git a/kokkos/kokkos/core/src/Kokkos_Layout.hpp b/kokkos/kokkos/core/src/Kokkos_Layout.hpp deleted file mode 100644 index f026806..0000000 --- a/kokkos/kokkos/core/src/Kokkos_Layout.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -/// \file Kokkos_Layout.hpp -/// \brief Declaration of various \c MemoryLayout options. - -#ifndef KOKKOS_LAYOUT_HPP -#define KOKKOS_LAYOUT_HPP - -#include - -namespace Kokkos { - -/// \struct LayoutLeft -/// \brief Memory layout tag indicating left-to-right (Fortran scheme) -/// striding of multi-indices. -/// -/// This is an example of a \c MemoryLayout template parameter of -/// View. The memory layout describes how View maps from a -/// multi-index (i0, i1, ..., ik) to a memory location. -/// -/// "Layout left" indicates a mapping where the leftmost index i0 -/// refers to contiguous access, and strides increase for dimensions -/// going right from there (i1, i2, ...). This layout imitates how -/// Fortran stores multi-dimensional arrays. For the special case of -/// a two-dimensional array, "layout left" is also called "column -/// major." -struct LayoutLeft { typedef LayoutLeft array_layout ; }; - -/// \struct LayoutRight -/// \brief Memory layout tag indicating right-to-left (C or -/// lexigraphical scheme) striding of multi-indices. -/// -/// This is an example of a \c MemoryLayout template parameter of -/// View. The memory layout describes how View maps from a -/// multi-index (i0, i1, ..., ik) to a memory location. -/// -/// "Right layout" indicates a mapping where the rightmost index ik -/// refers to contiguous access, and strides increase for dimensions -/// going left from there. This layout imitates how C stores -/// multi-dimensional arrays. For the special case of a -/// two-dimensional array, "layout right" is also called "row major." -struct LayoutRight { typedef LayoutRight array_layout ; }; - -/// \struct LayoutTileLeft -/// \brief Memory layout tag indicating left-to-right (Fortran scheme) -/// striding of multi-indices by tiles. -/// -/// This is an example of a \c MemoryLayout template parameter of -/// View. The memory layout describes how View maps from a -/// multi-index (i0, i1, ..., ik) to a memory location. -/// -/// "Tiled layout" indicates a mapping to contiguously stored -/// ArgN0 by ArgN1 tiles for the rightmost two -/// dimensions. Indices are LayoutLeft within each tile, and the -/// tiles themselves are arranged using LayoutLeft. Note that the -/// dimensions ArgN0 and ArgN1 of the tiles must be -/// compile-time constants. This speeds up index calculations. If -/// both tile dimensions are powers of two, Kokkos can optimize -/// further. -template < unsigned ArgN0 , unsigned ArgN1 , - bool IsPowerOfTwo = ( Impl::is_power_of_two::value && - Impl::is_power_of_two::value ) - > -struct LayoutTileLeft { - typedef LayoutTileLeft array_layout ; - enum { N0 = ArgN0 }; - enum { N1 = ArgN1 }; -}; - -} // namespace Kokkos - -#endif // #ifndef KOKKOS_LAYOUT_HPP - diff --git a/kokkos/kokkos/core/src/Kokkos_Macros.hpp b/kokkos/kokkos/core/src/Kokkos_Macros.hpp deleted file mode 100644 index b46ce32..0000000 --- a/kokkos/kokkos/core/src/Kokkos_Macros.hpp +++ /dev/null @@ -1,220 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_MACROS_HPP -#define KOKKOS_MACROS_HPP - -#include - -namespace Kokkos { -class HostSpace ; -class CudaSpace ; -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __CUDACC__ ) && ! defined( KOKKOS_HAVE_CUDA ) -#error "Compiling Kokkos with Cuda compiler but KOKKOS_HAVE_CUDA is undefined" -#endif - -#if defined( _OPENMP ) && ! defined( KOKKOS_HAVE_OPENMP ) -#error "Compiling Kokkos for OpenMP but KOKKOS_HAVE_OPENMP is undefined" -#endif - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __CUDACC__ ) - -#include - -/* Compiling with a CUDA compiler for device code. - * - * Include to pick up the CUDA_VERSION macro defined as: - * CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 ) - * - * When generating device code the __CUDA_ARCH__ macro is defined as: - * __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 ) - */ -#if ! defined( CUDA_VERSION ) -#error "#include did not define CUDA_VERSION" -#endif - -#if ( CUDA_VERSION < 4010 ) -#error "Cuda version 4.1 or greater required" -#endif - -#endif /* #if defined( __CUDACC__ ) */ - -//---------------------------------------------------------------------------- - -#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) - -/* Compiling with CUDA compiler for device code. */ - -#if ( __CUDA_ARCH__ < 200 ) -#error "Cuda device capability >= 2.0 is required" -#endif - -#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ -#define KOKKOS_INLINE_FUNCTION __device__ __host__ inline -#define KOKKOS_FUNCTION __device__ __host__ - -#endif /* #if defined( __CUDACC__ ) && #if defined( __CUDA_ARCH__ ) */ - -//---------------------------------------------------------------------------- - -#if defined( __CUDACC__ ) && ! defined( __CUDA_ARCH__ ) - -/* Compiling with CUDA compiler for host code. */ - -#define KOKKOS_FORCEINLINE_FUNCTION __forceinline__ - -#endif /* #if defined( __CUDACC__ ) && ! defined( __CUDA_ARCH__ ) */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __INTEL_COMPILER ) - -/* Compiling with Intel compiler */ -/* TBD: Version testing */ - -#ifndef KOKKOS_FORCEINLINE_FUNCTION -#define KOKKOS_FORCEINLINE_FUNCTION __forceinline -#endif - -#if defined( __MIC__ ) - -/* Compiling with Intel compiler for execution on an Intel MIC device. - * These devices are used in no-offload mode so the HostSpace is the MIC space. - */ - -#else - -#ifndef KOKKOS_USE_PRAGMA_SIMD -#define KOKKOS_USE_PRAGMA_SIMD -#endif - -/* - #pragma simd vectorlength(N) - #pragma ivdep -*/ - -#endif /* #if defined( __MIC__ ) */ - -#endif /* #if defined( __INTEL_COMPILER ) */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __GNUC__ ) /* GNU C */ || \ - defined( __GNUG__ ) /* GNU C++ */ - -/* Compiling with GNU compiler */ - -#ifndef KOKKOS_FORCEINLINE_FUNCTION -#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) -#endif - -/* Compiling with GNU compatible compiler. */ - -#endif - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( _OPENMP ) - -/* Compiling with in OpenMP mode. - * The value of _OPENMP is an integer value YYYYMM - * where YYYY and MM are the year and month designation - * of the supported OpenMP API version. - */ - -#endif /* END: #if defined( _OPENMP ) */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#ifndef KOKKOS_FUNCTION -#define KOKKOS_FUNCTION /* */ -#endif - -#ifndef KOKKOS_INLINE_FUNCTION -#define KOKKOS_INLINE_FUNCTION inline -#endif - -#ifndef KOKKOS_FORCEINLINE_FUNCTION -#define KOKKOS_FORCEINLINE_FUNCTION inline -#endif - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) - -namespace Kokkos { typedef CudaSpace ExecutionSpace ; } - -#else - -namespace Kokkos { typedef HostSpace ExecutionSpace ; } - -#endif - -#define KOKKOS_RESTRICT_EXECUTION_TO_DATA( DATA_SPACE , DATA_PTR ) \ - Kokkos::VerifyExecutionSpaceCanAccessDataSpace< \ - Kokkos::ExecutionSpace , DATA_SPACE >::verify( DATA_PTR ) - -#define KOKKOS_RESTRICT_EXECUTION_TO( DATA_SPACE ) \ - Kokkos::VerifyExecutionSpaceCanAccessDataSpace< \ - Kokkos::ExecutionSpace , DATA_SPACE >::verify() - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_MACROS_HPP */ - diff --git a/kokkos/kokkos/core/src/Kokkos_MemoryTraits.hpp b/kokkos/kokkos/core/src/Kokkos_MemoryTraits.hpp deleted file mode 100644 index e1bbc35..0000000 --- a/kokkos/kokkos/core/src/Kokkos_MemoryTraits.hpp +++ /dev/null @@ -1,111 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_MEMORYTRAITS_HPP -#define KOKKOS_MEMORYTRAITS_HPP - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -/** \brief Memory access traits for views, an extension point. - * - * These traits should be orthogonal. If there are dependencies then - * the MemoryTraits template must detect and enforce dependencies. - * - * A zero value is the default for a View, indicating that none of - * these traits are present. - */ -enum MemoryTraitsFlags - { Unmanaged = 0x01 - , RandomRead = 0x02 - }; - -template < unsigned T > -struct MemoryTraits { - enum { Unmanaged = T & unsigned(Kokkos::Unmanaged) }; - enum { RandomRead = T & unsigned(Kokkos::RandomRead) }; - - typedef MemoryTraits memory_traits ; -}; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -typedef Kokkos::MemoryTraits<0> MemoryManaged ; -typedef Kokkos::MemoryTraits< Kokkos::Unmanaged > MemoryUnmanaged ; -typedef Kokkos::MemoryTraits< Kokkos::Unmanaged | Kokkos::RandomRead > MemoryRandomRead ; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** \brief Memory alignment settings - * - * Sets global value for memory alignment. - * Enable compatibility of views from different devices with static stride. - * Use compiler flag to enable overwrites. - */ -enum { MEMORY_ALIGNMENT = -#if defined( KOKKOS_MEMORY_ALIGNMENT ) - KOKKOS_MEMORY_ALIGNMENT -#else - 128 -#endif - }; - -enum { MEMORY_ALIGNMENT_THRESHOLD = 4 }; - -} //namespace Impl -} // namespace Kokkos - -#endif /* #ifndef KOKKOS_MEMORYTRAITS_HPP */ - diff --git a/kokkos/kokkos/core/src/Kokkos_OpenMP.hpp b/kokkos/kokkos/core/src/Kokkos_OpenMP.hpp deleted file mode 100644 index 3b5ffed..0000000 --- a/kokkos/kokkos/core/src/Kokkos_OpenMP.hpp +++ /dev/null @@ -1,183 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_OPENMP_HPP -#define KOKKOS_OPENMP_HPP - -#include - -#if defined(KOKKOS_HAVE_OPENMP) - -#include -#include -#include -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { -class OpenMPexec ; -} // namespace Impl -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -/// \class OpenMP -/// \brief Kokkos device for multicore processors in the host memory space. -class OpenMP { -public: - //------------------------------------ - //! \name Type declarations that all Kokkos devices must provide. - //@{ - - typedef OpenMP device_type ; - typedef HostSpace::size_type size_type ; - typedef HostSpace memory_space ; - typedef LayoutRight array_layout ; - typedef OpenMP host_mirror_device_type ; - - //@} - //------------------------------------ - //! \name Functions that all Kokkos devices must implement. - //@{ - - inline static bool in_parallel() { return omp_in_parallel(); } - - /** \brief Set the device in a "sleep" state. A noop for OpenMP. */ - static bool sleep(); - - /** \brief Wake the device from the 'sleep' state. A noop for OpenMP. */ - static bool wake(); - - /** \brief Wait until all dispatched functors complete. A noop for OpenMP. */ - static void fence() {} - - /// \brief Print configuration information to the given output stream. - static void print_configuration( std::ostream & , const bool detail = false ); - - /// \brief Free any resources being consumed by the device. - static void finalize(); - - /** \brief Initialize the device. - * - * 1) If the hardware locality library is enabled and OpenMP has not - * already bound threads then bind OpenMP threads to maximize - * core utilization and group for memory hierarchy locality. - * - * 2) Allocate a HostThread for each OpenMP thread to hold its - * topology and fan in/out data. - */ - static void initialize( const unsigned team_count = 1 , - const unsigned threads_per_team = 1 , - const unsigned use_numa_count = 0 , - const unsigned use_cores_per_numa = 0 ); - - static int is_initialized(); - - static unsigned league_max(); - static unsigned team_max(); - //@} - //------------------------------------ - //! \name Function for the functor device interface */ - //@{ - - inline int league_rank() const ; - inline int league_size() const ; - inline int team_rank() const ; - inline int team_size() const ; - - inline void team_barrier(); - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value ; - */ - template< typename Type > - inline Type team_scan( const Type & value ); - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the - * league's parallel execution, be the scan's total. - * Parallel execution ordering of the league's teams is non-deterministic. - * As such the base value for each team's scan operation is similarly - * non-deterministic. - */ - template< typename TypeLocal , typename TypeGlobal > - inline TypeGlobal team_scan( const TypeLocal & value , TypeGlobal * const global_accum ); - - - inline void * get_shmem( const int size ); - - explicit inline OpenMP( Impl::OpenMPexec & ); - - //------------------------------------ - -private: - - Impl::OpenMPexec & m_exec ; - -}; - -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - -#include -#include - -/*--------------------------------------------------------------------------*/ - -#endif /* #if defined(KOKKOS_HAVE_OPENMP) */ -#endif /* #ifndef KOKKOS_OPENMP_HPP */ - - diff --git a/kokkos/kokkos/core/src/Kokkos_Parallel.hpp b/kokkos/kokkos/core/src/Kokkos_Parallel.hpp deleted file mode 100644 index 06cc14c..0000000 --- a/kokkos/kokkos/core/src/Kokkos_Parallel.hpp +++ /dev/null @@ -1,529 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -/// \file Kokkos_Parallel.hpp -/// \brief Declaration of parallel operators - -#ifndef KOKKOS_PARALLEL_HPP -#define KOKKOS_PARALLEL_HPP - -#include -#include -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/// \class ParallelFor -/// \brief Implementation of the ParallelFor operator that has a -/// partial specialization for the device. -/// -/// This is an implementation detail of parallel_for. Users should -/// skip this and go directly to the nonmember function parallel_for. -template< class FunctorType , - class WorkSpec , - class DeviceType = typename FunctorType::device_type > -class ParallelFor ; - -} // namespace Impl -} // namespace Kokkos - -namespace Kokkos { - -/// \class VectorParallel -/// \brief Request for parallel_for to attempt thread+vector parallelism. -struct VectorParallel -{ - const size_t nwork ; - VectorParallel( const size_t n ) : nwork(n) {} - operator size_t () const { return nwork ; } -}; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -/** \brief Execute \c functor \c work_count times in parallel. - * - * A "functor" is a class containing the function to execute in - * parallel, any data needed for that execution, and a \c device_type - * typedef. Here is an example functor for parallel_for: - * - * \code - * class FunctorType { - * public: - * typedef ... device_type ; - * void operator() (IntType iwork) const ; - * }; - * \endcode - * - * In the above example, \c IntType is any integer type for which a - * valid conversion from \c size_t to \c IntType exists. Its - * operator() method defines the operation to parallelize, - * over the range of integer indices iwork=[0,work_count-1]. - * This compares to a single iteration \c iwork of a \c for loop. - */ -template< class FunctorType > -inline -void parallel_for( const size_t work_count , - const FunctorType & functor ) -{ - Impl::ParallelFor< FunctorType , size_t > tmp( functor , work_count ); -} - - -/** \brief Execute \c functor \c work_count times in parallel, with vectorization. - * - * This is like parallel_for, except that it mandates - * vectorization as well as parallelization of the given functor. We - * emphasize "mandates": this means that the user asserts that - * vectorization is correct, and insists that the compiler vectorize. - * Mandating vectorization is not always desirable, for example if the - * body of the functor is complicated. In some cases, users might - * want to parallelize over threads, and use vectorization inside the - * parallel operation. Furthermore, the compiler might still be able - * to vectorize through a parallel_for. Thus, users should take care - * not to use this execution option arbitrarily. - */ -template< class FunctorType > -inline -void vector_parallel_for( const size_t work_count , - const FunctorType & functor ) -{ - Impl::ParallelFor< FunctorType , VectorParallel > tmp( functor , work_count ); -} - -template< class DeviceType > -class MultiFunctorParallelFor ; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/// \class ParallelReduce -/// \brief Implementation detail of parallel_reduce. -/// -/// This is an implementation detail of parallel_reduce. Users should -/// skip this and go directly to the nonmember function parallel_reduce. -template< class FunctorType , - class WorkSpec , - class DeviceType = typename FunctorType::device_type > -class ParallelReduce ; - -/// \class ReduceAdapter -/// \brief Implementation detail of parallel_reduce. -/// -/// This is an implementation detail of parallel_reduce. Users should -/// skip this and go directly to the nonmember function parallel_reduce. -template< class FunctorType , - class ValueType = typename FunctorType::value_type > -struct ReduceAdapter ; - -} // namespace Impl -} // namespace Kokkos - - -namespace Kokkos { - -/** \brief Parallel reduction - * - * Example of a parallel_reduce functor for a POD (plain old data) value type: - * \code - * class FunctorType { // For POD value type - * public: - * typedef ... device_type ; - * typedef value_type ; - * void operator()( iwork , & update ) const ; - * void init( & update ) const ; - * void join( volatile & update , - * volatile const & input ) const ; - * - * typedef true_type has_final ; - * void final( & update ) const ; - * }; - * \endcode - * - * Example of a parallel_reduce functor for an array of POD (plain old data) values: - * \code - * class FunctorType { // For array of POD value - * public: - * typedef ... device_type ; - * typedef value_type[] ; - * void operator()( , update[] ) const ; - * void init( update[] ) const ; - * void join( volatile update[] , - * volatile const input[] ) const ; - * - * typedef true_type has_final ; - * void final( update[] ) const ; - * }; - * \endcode - */ -template< class FunctorType > -inline -void parallel_reduce( const size_t work_count , - const FunctorType & functor ) -{ - Impl::ParallelReduce< FunctorType , size_t > reduce( functor , work_count ); -} - -/** \brief Parallel reduction and output to host. - * - * If FunctorType::value_type is - * - \c PodType, then \c reference_type is PodType & . - * - PodType[], then \c reference_type is PodType * . - */ -template< class FunctorType > -inline -void parallel_reduce( const size_t work_count , - const FunctorType & functor , - typename Kokkos::Impl::ReduceAdapter< FunctorType >::reference_type result ) -{ - Impl::ParallelReduce< FunctorType, size_t > - reduce( functor , work_count , Kokkos::Impl::ReduceAdapter< FunctorType >::pointer( result ) ); - - reduce.wait(); -} - -template< class FunctorType > -inline -void parallel_reduce( const VectorParallel & work_count , - const FunctorType & functor , - typename Kokkos::Impl::ReduceAdapter< FunctorType >::reference_type result ) -{ - Impl::ParallelReduce< FunctorType, VectorParallel > - reduce( functor , work_count , Kokkos::Impl::ReduceAdapter< FunctorType >::pointer( result ) ); - - reduce.wait(); -} - -template< class DeviceType > -class MultiFunctorParallelReduce ; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/// \class ParallelReduce -/// \brief Implementation detail of parallel_reduce. -/// -/// This is an implementation detail of parallel_reduce. Users should -/// skip this and go directly to the nonmember function parallel_reduce. -template< class FunctorType , - class WorkSpec , - class DeviceType = typename FunctorType::device_type > -class ParallelScan ; - -} // namespace Impl -} // namespace Kokkos - -namespace Kokkos { - -template< class FunctorType > -inline -void parallel_scan( const size_t work_count , - const FunctorType & functor ) -{ - Impl::ParallelScan< FunctorType , size_t > scan( functor , work_count ); -} - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -/** \brief Parallel work request for shared memory, league size, and team size. - * - * If the shared size is too large then slow (global) memory will be used. - * If the league or team size are too large then they will be reduced. - */ -struct ParallelWorkRequest { - size_t league_size ; ///< Size of league (number of teams in a league) - size_t team_size ; ///< Size of team (number of threads in a team) - - KOKKOS_INLINE_FUNCTION - ParallelWorkRequest() : league_size(0), team_size(0) {} - - KOKKOS_INLINE_FUNCTION - ParallelWorkRequest( size_t s0 , size_t s1 ) : league_size(s0), team_size(s1) {} -}; - -/** \brief Execute functor in parallel with work request, - * the actual league_size and team_size may be smaller. - * - * class FunctorType { - * public: - * typedef ... device_type ; - * void operator()( device_type ) const ; - * }; - */ -template< class FunctorType > -inline -void parallel_for( const ParallelWorkRequest & request , - const FunctorType & functor ) -{ - Kokkos::Impl::ParallelFor< FunctorType , ParallelWorkRequest >( functor , request ); -} - -} // namespace Kokkos - -namespace Kokkos { - -/** \brief Parallel reduction. - * - * class FunctorType { - * public: - * typedef ... device_type ; - * typedef value_type ; // POD type - * void operator()( device_type , & ) const ; - * void init( & ) const ; - * void join( volatile & update , - * volatile const & input ) const ; - * - * typedef true_type has_final ; - * void final( & update ) const ; - * }; - * - * class FunctorType { // For array of POD value - * public: - * typedef ... device_type ; - * typedef value_type[] ; - * void operator()( device_type , update[] ) const ; - * void init( update[] ) const ; - * void join( volatile update[] , - * volatile const input[] ) const ; - * - * typedef true_type has_final ; - * void final( update[] ) const ; - * }; - */ -template< class FunctorType > -inline -void parallel_reduce( const Kokkos::ParallelWorkRequest & request , - const FunctorType & functor ) -{ - Impl::ParallelReduce< FunctorType , Kokkos::ParallelWorkRequest > reduce( functor , request ); -} - -template< class FunctorType > -inline -void parallel_reduce( const Kokkos::ParallelWorkRequest & request , - const FunctorType & functor , - typename Kokkos::Impl::ReduceAdapter< FunctorType >::reference_type result ) -{ - Impl::ParallelReduce< FunctorType , Kokkos::ParallelWorkRequest > - reduce( functor , request , Kokkos::Impl::ReduceAdapter< FunctorType >::pointer( result ) ); - - reduce.wait(); // Wait for reduce to complete and output result -} - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class FunctorType , class Enable = void > -struct FunctorHasJoin : public false_type {}; - -template< class FunctorType > -struct FunctorHasJoin< FunctorType , typename enable_if< 0 < sizeof( & FunctorType::join ) >::type > - : public true_type {}; - -template< class FunctorType , class Enable = void > -struct FunctorHasFinal : public false_type {}; - -template< class FunctorType > -struct FunctorHasFinal< FunctorType , typename enable_if< 0 < sizeof( & FunctorType::final ) >::type > - : public true_type {}; - -template< class FunctorType , class Enable = void > -struct FunctorShmemSize -{ - static inline size_t value( const FunctorType & ) { return 0 ; } -}; - -template< class FunctorType > -struct FunctorShmemSize< FunctorType , typename enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type > -{ - static inline size_t value( const FunctorType & f ) { return f.shmem_size() ; } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class FunctorType , class ScalarType > -struct ReduceAdapter -{ - enum { StaticValueSize = sizeof(ScalarType) }; - - typedef ScalarType & reference_type ; - typedef ScalarType * pointer_type ; - typedef ScalarType scalar_type ; - - KOKKOS_INLINE_FUNCTION static - reference_type reference( void * p ) { return *((ScalarType*) p); } - - KOKKOS_INLINE_FUNCTION static - reference_type reference( void * p , unsigned i ) { return ((ScalarType*) p)[i]; } - - KOKKOS_INLINE_FUNCTION static - pointer_type pointer( reference_type p ) { return & p ; } - - KOKKOS_INLINE_FUNCTION static - unsigned value_count( const FunctorType & ) { return 1 ; } - - KOKKOS_INLINE_FUNCTION static - unsigned value_size( const FunctorType & ) { return sizeof(ScalarType); } - - KOKKOS_INLINE_FUNCTION static - void copy( const FunctorType & , void * const dst , const void * const src ) - { *((scalar_type*)dst) = *((const scalar_type*)src); } - - KOKKOS_INLINE_FUNCTION static - void join( const FunctorType & f , volatile void * update , volatile const void * input ) - { f.join( *((volatile ScalarType*)update) , *((volatile const ScalarType*)input) ); } - - template< class F > - KOKKOS_INLINE_FUNCTION static - void final( const F & f , - typename enable_if< ( is_same::value && - FunctorHasFinal::value ) - >::type * p ) - { f.final( *((ScalarType *) p ) ); } - - template< class F > - KOKKOS_INLINE_FUNCTION static - void final( const F & , - typename enable_if< ( is_same::value && - ! FunctorHasFinal::value ) - >::type * ) - {} -}; - -template< class FunctorType , class ScalarType > -struct ReduceAdapter< FunctorType , ScalarType[] > -{ - enum { StaticValueSize = 0 }; - - typedef ScalarType * reference_type ; - typedef ScalarType * pointer_type ; - typedef ScalarType scalar_type ; - - KOKKOS_INLINE_FUNCTION static - ScalarType * reference( void * p ) { return (ScalarType*) p ; } - - KOKKOS_INLINE_FUNCTION static - reference_type reference( void * p , unsigned i ) { return ((ScalarType*) p)+i; } - - KOKKOS_INLINE_FUNCTION static - pointer_type pointer( reference_type p ) { return p ; } - - KOKKOS_INLINE_FUNCTION static - unsigned value_count( const FunctorType & f ) { return f.value_count ; } - - KOKKOS_INLINE_FUNCTION static - unsigned value_size( const FunctorType & f ) { return f.value_count * sizeof(ScalarType); } - - KOKKOS_INLINE_FUNCTION static - void copy( const FunctorType & f , void * const dst , const void * const src ) - { - for ( int i = 0 ; i < int(f.value_count) ; ++i ) { - ((scalar_type*)dst)[i] = ((const scalar_type*)src)[i]; - } - } - - KOKKOS_INLINE_FUNCTION static - void join( const FunctorType & f , volatile void * update , volatile const void * input ) - { f.join( ((volatile ScalarType*)update) , ((volatile const ScalarType*)input) ); } - - template< class F > - KOKKOS_INLINE_FUNCTION static - void final( const F & f , - typename enable_if< ( is_same::value && - FunctorHasFinal::value ) - >::type * p ) - { f.final( ((ScalarType *) p ) ); } - - template< class F > - KOKKOS_INLINE_FUNCTION static - void final( const F & , - typename enable_if< ( is_same::value && - ! FunctorHasFinal::value ) - >::type * ) - {} -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* KOKKOS_PARALLEL_HPP */ - diff --git a/kokkos/kokkos/core/src/Kokkos_ParallelReduce.hpp b/kokkos/kokkos/core/src/Kokkos_ParallelReduce.hpp deleted file mode 100644 index c6d929e..0000000 --- a/kokkos/kokkos/core/src/Kokkos_ParallelReduce.hpp +++ /dev/null @@ -1,75 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_PARALLELREDUCE_HPP -#define KOKKOS_PARALLELREDUCE_HPP - -#include -#include -#include -#include - -namespace Kokkos { - -//---------------------------------------------------------------------------- - -template< class FunctorType > -void vector_parallel_reduce( const size_t work_count , - const FunctorType & functor , - typename Impl::ReduceAdapter< FunctorType >::reference_type result ) - -{ - Impl::ParallelReduce< FunctorType, VectorParallel > - reduce( functor , work_count , Kokkos::Impl::ReduceAdapter< FunctorType >::pointer( result ) ); - - reduce.wait(); -} - -//---------------------------------------------------------------------------- - -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -#endif /* KOKKOS_PARALLELREDUCE_HPP */ - diff --git a/kokkos/kokkos/core/src/Kokkos_Serial.hpp b/kokkos/kokkos/core/src/Kokkos_Serial.hpp deleted file mode 100644 index bce8cbc..0000000 --- a/kokkos/kokkos/core/src/Kokkos_Serial.hpp +++ /dev/null @@ -1,215 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -/// \file Kokkos_Serial.hpp -/// \brief Declaration and definition of Kokkos::Serial device. - -#ifndef KOKKOS_SERIAL_HPP -#define KOKKOS_SERIAL_HPP - -#include -#include -#include -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -/// \class Serial -/// \brief Kokkos device for non-parallel execution -/// -/// A "device" represents a parallel execution model. It tells Kokkos -/// how to parallelize the execution of kernels in a parallel_for or -/// parallel_reduce. For example, the Threads device uses Pthreads or -/// C++11 threads on a CPU, the OpenMP device uses the OpenMP language -/// extensions, and the Cuda device uses NVIDIA's CUDA programming -/// model. The Serial device executes "parallel" kernels -/// sequentially. This is useful if you really do not want to use -/// threads, or if you want to explore different combinations of MPI -/// and shared-memory parallel programming models. -class Serial { -public: - //! \name Type declarations that all Kokkos devices must provide. - //@{ - - //! The device type (same as this class). - typedef Serial device_type ; - //! The size_type typedef best suited for this device. - typedef HostSpace::size_type size_type ; - //! This device's preferred memory space. - typedef HostSpace memory_space ; - //! This device's preferred array layout. - typedef LayoutRight array_layout ; - /// \brief This device's host mirror type. - /// - /// Serial is a host device, so the host mirror type is the same as - /// the device type itself. - typedef Serial host_mirror_device_type ; - - //@} - - /// \brief True if and only if this method is being called in a - /// thread-parallel function. - /// - /// For the Serial device, this method always returns false, - /// because parallel_for or parallel_reduce with the Serial device - /// always execute sequentially. - inline static int in_parallel() { return false ; } - - /** \brief Set the device in a "sleep" state. - * - * This function sets the device in a "sleep" state in which it is - * not ready for work. This may consume less resources than if the - * device were in an "awake" state, but it may also take time to - * bring the device from a sleep state to be ready for work. - * - * \return True if the device is in the "sleep" state, else false if - * the device is actively working and could not enter the "sleep" - * state. - */ - static bool sleep(); - - /// \brief Wake the device from the 'sleep' state so it is ready for work. - /// - /// \return True if the device is in the "ready" state, else "false" - /// if the device is actively working (which also means that it's - /// awake). - static bool wake(); - - /// \brief Wait until all dispatched functors complete. - /// - /// The parallel_for or parallel_reduce dispatch of a functor may - /// return asynchronously, before the functor completes. This - /// method does not return until all dispatched functors on this - /// device have completed. - static void fence() {} - - static void initialize() {} - - static int is_initialized() { return 1 ; } - - //! Free any resources being consumed by the device. - static void finalize() {} - - //! Print configuration information to the given output stream. - static void print_configuration( std::ostream & , const bool detail = false ); - - inline int league_rank() const { return 0 ; } - inline int league_size() const { return 1 ; } - inline int team_rank() const { return 0 ; } - inline int team_size() const { return 1 ; } - - inline void team_barrier() {} - - inline std::pair work_range( size_t n ) const - { return std::pair(0,n); } - - template< typename T > - inline T * get_shmem( const int count ); - - static void * resize_reduce_scratch( const unsigned ); -}; - -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- -//TODO: Needs constructor for Kokkos::ParallelWorkRequest CRT - -template< class FunctorType , class WorkSpec > -class ParallelFor< FunctorType , WorkSpec , Serial > { -public: - - ParallelFor( const FunctorType & functor , const size_t work_count ) - { - for ( size_t iwork = 0 ; iwork < work_count ; ++iwork ) { - functor( iwork ); - } - } -}; - -template< class FunctorType , class WorkSpec > -class ParallelReduce< FunctorType , WorkSpec , Serial > { -public: - - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - - ParallelReduce( const FunctorType & functor , - const size_t work_count , - pointer_type result = 0 ) - { - if ( 0 == result ) { - result = (pointer_type ) Serial::resize_reduce_scratch( Reduce::value_size( functor ) ); - } - - functor.init( Reduce::reference( result ) ); - - for ( size_t iwork = 0 ; iwork < work_count ; ++iwork ) { - functor( iwork , Reduce::reference( result ) ); - } - - Reduce::final( functor , result ); - } - - void wait() {} -}; - -//---------------------------------------------------------------------------- - -} // namespace Impl -} // namespace Kokkos - -#endif /* #define KOKKOS_SERIAL_HPP */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - diff --git a/kokkos/kokkos/core/src/Kokkos_Threads.hpp b/kokkos/kokkos/core/src/Kokkos_Threads.hpp deleted file mode 100644 index d553f15..0000000 --- a/kokkos/kokkos/core/src/Kokkos_Threads.hpp +++ /dev/null @@ -1,211 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_THREADS_HPP -#define KOKKOS_THREADS_HPP - -#include -#include -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { -class ThreadsExec ; -} // namespace Impl -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -/** \brief Device for a pool of Pthreads or C11 threads on a CPU. */ -class Threads { -public: - //! \name Type declarations that all Kokkos devices must provide. - //@{ - - typedef Threads device_type ; - typedef Kokkos::HostSpace memory_space ; - typedef memory_space::size_type size_type ; - typedef Kokkos::LayoutRight array_layout ; - typedef Kokkos::Threads host_mirror_device_type ; - - //@} - /*------------------------------------------------------------------------*/ - //! \name Static functions that all Kokkos devices must implement. - //@{ - - /// \brief True if and only if this method is being called in a - /// thread-parallel function. - static int in_parallel(); - - /** \brief Set the device in a "sleep" state. - * - * This function sets the device in a "sleep" state in which it is - * not ready for work. This may consume less resources than if the - * device were in an "awake" state, but it may also take time to - * bring the device from a sleep state to be ready for work. - * - * \return True if the device is in the "sleep" state, else false if - * the device is actively working and could not enter the "sleep" - * state. - */ - static bool sleep(); - - /// \brief Wake the device from the 'sleep' state so it is ready for work. - /// - /// \return True if the device is in the "ready" state, else "false" - /// if the device is actively working (which also means that it's - /// awake). - static bool wake(); - - /// \brief Wait until all dispatched functors complete. - /// - /// The parallel_for or parallel_reduce dispatch of a functor may - /// return asynchronously, before the functor completes. This - /// method does not return until all dispatched functors on this - /// device have completed. - static void fence(); - - /// \brief Free any resources being consumed by the device. - /// - /// For the Threads device, this terminates spawned worker threads. - static void finalize(); - - /// \brief Print configuration information to the given output stream. - static void print_configuration( std::ostream & , const bool detail = false ); - - //@} - //! \name Function for the functor device interface */ - //@{ - - inline int league_rank() const ; - inline int league_size() const ; - inline int team_rank() const ; - inline int team_size() const ; - - inline void team_barrier(); - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value ; - */ - template< typename Type > - inline Type team_scan( const Type & value ); - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the - * league's parallel execution, be the scan's total. - * Parallel execution ordering of the league's teams is non-deterministic. - * As such the base value for each team's scan operation is similarly - * non-deterministic. - */ - template< typename TypeLocal , typename TypeGlobal > - inline TypeGlobal team_scan( const TypeLocal & value , TypeGlobal * const global_accum ); - - inline void * get_shmem( const int size ); - - explicit inline Threads( Impl::ThreadsExec & ); - - /**@} */ - /*------------------------------------------------------------------------*/ - //! \name Device-specific functions - //@{ - - /** \brief Initialize the device in the "ready to work" state. - * - * The device is initialized in a "ready to work" or "awake" state. - * This state reduces latency and thus improves performance when - * dispatching work. However, the "awake" state consumes resources - * even when no work is being done. You may call sleep() to put - * the device in a "sleeping" state that does not consume as many - * resources, but it will take time (latency) to awaken the device - * again (via the wake()) method so that it is ready for work. - * - * Teams of threads are distributed as evenly as possible across - * the requested number of numa regions and cores per numa region. - * A team will not be split across a numa region. - * - * If the 'use_' arguments are not supplied the hwloc is queried - * to use all available cores. - */ - static void initialize( unsigned team_count = 1 , - unsigned threads_per_team = 1 , - unsigned use_numa_count = 0 , - unsigned use_cores_per_numa = 0 ); - - static int is_initialized(); - - static unsigned league_max(); - static unsigned team_max(); - - //@} - /*------------------------------------------------------------------------*/ - -private: - - friend class Impl::ThreadsExec ; - - Impl::ThreadsExec & m_exec ; -}; - -/*--------------------------------------------------------------------------*/ - -} // namespace Kokkos - -#include -#include -#include - -#endif /* #define KOKKOS_THREADS_HPP */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - diff --git a/kokkos/kokkos/core/src/Kokkos_View.hpp b/kokkos/kokkos/core/src/Kokkos_View.hpp deleted file mode 100644 index db18f17..0000000 --- a/kokkos/kokkos/core/src/Kokkos_View.hpp +++ /dev/null @@ -1,1693 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_VIEW_HPP -#define KOKKOS_VIEW_HPP - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** \brief View specialization mapping of view traits to a specialization tag */ -template< typename ScalarType , class ValueType , - class ArrayLayout , class uRank , class uRankDynamic , - class MemorySpace , class MemoryTraits > -struct ViewSpecialize ; - -template< class DstViewSpecialize , class SrcViewSpecialize = void , class Enable = void > -struct ViewAssignment ; - -} /* namespace Impl */ -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -/** \class ViewTraits - * \brief Traits class for accessing attributes of a View. - * - * This is an implementation detail of View. It is only of interest - * to developers implementing a new specialization of View. - * - * Template argument permutations: - * - View< DataType , Device , void , void > - * - View< DataType , Device , MemoryTraits , void > - * - View< DataType , Device , void , MemoryTraits > - * - View< DataType , ArrayLayout , Device , void > - * - View< DataType , ArrayLayout , Device , MemoryTraits > - */ -template< class DataType , - class Arg1 , - class Arg2 , - class Arg3 > -class ViewTraits { -private: - - // Arg1 is either Device or Layout, both of which must have 'typedef ... array_layout'. - // If Arg1 is not Layout then Arg1 must be Device - enum { Arg1IsDevice = ! Impl::is_same< Arg1 , typename Arg1::array_layout >::value }; - enum { Arg2IsDevice = ! Arg1IsDevice }; - - // If Arg1 is device and Arg2 is not void then Arg2 is MemoryTraits. - // If Arg1 is device and Arg2 is void and Arg3 is not void then Arg3 is MemoryTraits. - // If Arg2 is device and Arg3 is not void then Arg3 is MemoryTraits. - enum { Arg2IsVoid = Impl::is_same< Arg2 , void >::value }; - enum { Arg3IsVoid = Impl::is_same< Arg3 , void >::value }; - enum { Arg2IsMemory = ! Arg2IsVoid && Arg1IsDevice && Arg3IsVoid }; - enum { Arg3IsMemory = ! Arg3IsVoid && ( ( Arg1IsDevice && Arg2IsVoid ) || Arg2IsDevice ) }; - - - typedef typename Arg1::array_layout ArrayLayout ; - typedef typename Impl::if_c< Arg1IsDevice , Arg1 , Arg2 >::type::device_type DeviceType ; - - typedef typename Impl::if_c< Arg2IsMemory , Arg2 , - typename Impl::if_c< Arg3IsMemory , Arg3 , MemoryManaged - >::type >::type::memory_traits MemoryTraits ; - - typedef Impl::AnalyzeShape analysis ; - -public: - - //------------------------------------ - // Data type traits: - - typedef DataType data_type ; - typedef typename analysis::const_type const_data_type ; - typedef typename analysis::non_const_type non_const_data_type ; - - //------------------------------------ - // Scalar type traits: - - typedef typename analysis::scalar_type scalar_type ; - typedef typename analysis::const_scalar_type const_scalar_type ; - typedef typename analysis::non_const_scalar_type non_const_scalar_type ; - - //------------------------------------ - // Value type traits: - - typedef typename analysis::value_type value_type ; - typedef typename analysis::const_value_type const_value_type ; - typedef typename analysis::non_const_value_type non_const_value_type ; - - //------------------------------------ - // Layout and shape traits: - - typedef typename Impl::StaticAssertSame< ArrayLayout , typename ArrayLayout ::array_layout >::type array_layout ; - - typedef typename analysis::shape shape_type ; - - enum { rank = shape_type::rank }; - enum { rank_dynamic = shape_type::rank_dynamic }; - - //------------------------------------ - // Device and memory space traits: - - typedef typename Impl::StaticAssertSame< DeviceType , typename DeviceType ::device_type >::type device_type ; - typedef typename Impl::StaticAssertSame< MemoryTraits , typename MemoryTraits::memory_traits >::type memory_traits ; - - typedef typename device_type::memory_space memory_space ; - typedef typename device_type::size_type size_type ; - - enum { is_hostspace = Impl::is_same< memory_space , HostSpace >::value }; - enum { is_managed = memory_traits::Unmanaged == 0 }; - - //------------------------------------ - // Specialization: - typedef typename - Impl::ViewSpecialize< scalar_type , - value_type , - array_layout , - Impl::unsigned_ , - Impl::unsigned_ , - memory_space , - memory_traits - >::type specialize ; -}; - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** \brief Default view specialization has ScalarType == ValueType - * and LayoutLeft or LayoutRight. - */ -struct LayoutDefault ; - -template< typename ScalarType , class Rank , class RankDynamic , class MemorySpace , class MemoryTraits > -struct ViewSpecialize< ScalarType , ScalarType , - LayoutLeft , Rank , RankDynamic , - MemorySpace , MemoryTraits > -{ typedef LayoutDefault type ; }; - -template< typename ScalarType , class Rank , class RankDynamic , class MemorySpace , class MemoryTraits > -struct ViewSpecialize< ScalarType , ScalarType , - LayoutRight , Rank , RankDynamic , - MemorySpace , MemoryTraits > -{ typedef LayoutDefault type ; }; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** \brief Types for compile-time detection of View usage errors */ -namespace ViewError { - -struct allocation_constructor_requires_managed {}; -struct user_pointer_constructor_requires_unmanaged {}; -struct device_shmem_constructor_requires_unmanaged {}; - -struct scalar_operator_called_from_non_scalar_view {}; - -} /* namespace ViewError */ - -//---------------------------------------------------------------------------- -/** \brief Enable view parentheses operator for - * match of layout and integral arguments. - * If correct rank define type from traits, - * otherwise define type as an error message. - */ -template< class ReturnType , class Traits , class Layout , unsigned Rank , - typename iType0 = int , typename iType1 = int , - typename iType2 = int , typename iType3 = int , - typename iType4 = int , typename iType5 = int , - typename iType6 = int , typename iType7 = int , - class Enable = void > -struct ViewEnableArrayOper ; - -template< class ReturnType , class Traits , class Layout , unsigned Rank , - typename iType0 , typename iType1 , - typename iType2 , typename iType3 , - typename iType4 , typename iType5 , - typename iType6 , typename iType7 > -struct ViewEnableArrayOper< - ReturnType , Traits , Layout , Rank , - iType0 , iType1 , iType2 , iType3 , - iType4 , iType5 , iType6 , iType7 , - typename enable_if< - iType0(0) == 0 && iType1(0) == 0 && iType2(0) == 0 && iType3(0) == 0 && - iType4(0) == 0 && iType5(0) == 0 && iType6(0) == 0 && iType7(0) == 0 && - is_same< typename Traits::array_layout , Layout >::value && - ( unsigned(Traits::rank) == Rank ) - >::type > -{ - typedef ReturnType type ; -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -struct AllocateWithoutInitializing {}; - -namespace { -const AllocateWithoutInitializing allocate_without_initializing = AllocateWithoutInitializing(); -} - -/** \class View - * \brief View to an array of data. - * - * A View represents an array of one or more dimensions. - * For details, please refer to Kokkos' tutorial materials. - * - * \section Kokkos_View_TemplateParameters Template parameters - * - * This class has both required and optional template parameters. The - * \c DataType parameter must always be provided, and must always be - * first. The parameters \c Arg1Type, \c Arg2Type, and \c Arg3Type are - * placeholders for different template parameters. The default value - * of the fifth template parameter \c Specialize suffices for most use - * cases. When explaining the template parameters, we won't refer to - * \c Arg1Type, \c Arg2Type, and \c Arg3Type; instead, we will refer - * to the valid categories of template parameters, in whatever order - * they may occur. - * - * Valid ways in which template arguments may be specified: - * - View< DataType , Device > - * - View< DataType , Device , MemoryTraits > - * - View< DataType , Device , void , MemoryTraits > - * - View< DataType , Layout , Device > - * - View< DataType , Layout , Device , MemoryTraits > - * - * \tparam DataType (required) This indicates both the type of each - * entry of the array, and the combination of compile-time and - * run-time array dimension(s). For example, double* - * indicates a one-dimensional array of \c double with run-time - * dimension, and int*[3] a two-dimensional array of \c int - * with run-time first dimension and compile-time second dimension - * (of 3). In general, the run-time dimensions (if any) must go - * first, followed by zero or more compile-time dimensions. For - * more examples, please refer to the tutorial materials. - * - * \tparam Device (required) The execution model for parallel - * operations. Examples include Threads, OpenMP, Cuda, and Serial. - * - * \tparam Layout (optional) The array's layout in memory. For - * example, LayoutLeft indicates a column-major (Fortran style) - * layout, and LayoutRight a row-major (C style) layout. If not - * specified, this defaults to the preferred layout for the - * Device. - * - * \tparam MemoryTraits (optional) Assertion of the user's intended - * access behavior. For example, RandomRead indicates read-only - * access with limited spatial locality, and Unmanaged lets users - * wrap externally allocated memory in a View without automatic - * deallocation. - * - * \section Kokkos_View_MT \c MemoryTraits discussion - * - * \subsection Kokkos_View_MT_Interp \c MemoryTraits interpretation depends on \c Device - * - * Some \c MemoryTraits options may have different interpretations for - * different \c Device types. For example, with the Cuda device, - * RandomRead tells Kokkos to fetch the data through the texture - * cache, whereas the non-GPU devices have no such hardware construct. - * - * \subsection Kokkos_View_MT_PrefUse Preferred use of \c MemoryTraits - * - * Users should defer applying the optional \c MemoryTraits parameter - * until the point at which they actually plan to rely on it in a - * computational kernel. This minimizes the number of template - * parameters exposed in their code, which reduces the cost of - * compilation. Users may always assign a View without specified - * MemoryTraits to a compatible View with that specification. - * For example: - * \code - * // Pass in the simplest types of View possible. - * void - * doSomething (View out, - * View in) - * { - * // Assign the "generic" View in to a RandomRead View in_rr. - * // Note that RandomRead View objects must have const data. - * View in_rr = in; - * // ... do something with in_rr and out ... - * } - * \endcode - */ -template< class DataType , - class Arg1Type , /* ArrayLayout or DeviceType */ - class Arg2Type = void , /* DeviceType or MemoryTraits */ - class Arg3Type = void , /* MemoryTraits */ - class Specialize = - typename ViewTraits::specialize > -class View ; - -template< class DataType , - class Arg1Type , - class Arg2Type , - class Arg3Type > -class View< DataType , Arg1Type , Arg2Type , Arg3Type , Impl::LayoutDefault > - : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > -{ -public: - - typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ; - -private: - - // Assignment of compatible views requirement: - template< class , class , class , class , class > friend class View ; - - // Assignment of compatible subview requirement: - template< class , class , class > friend struct Impl::ViewAssignment ; - - typedef Impl::LayoutStride< typename traits::shape_type , - typename traits::array_layout > stride_type ; - - typename traits::scalar_type * m_ptr_on_device ; - typename traits::shape_type m_shape ; - stride_type m_stride ; - -public: - - typedef View< typename traits::const_data_type , - typename traits::array_layout , - typename traits::device_type , - typename traits::memory_traits > const_type ; - - typedef View< typename traits::non_const_data_type , - typename traits::array_layout , - typename traits::device_type::host_mirror_device_type , - void > HostMirror ; - - //------------------------------------ - // Shape - - enum { Rank = traits::rank }; - - KOKKOS_INLINE_FUNCTION typename traits::shape_type shape() const { return m_shape ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const { return m_shape.N0 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_shape.N1 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_shape.N2 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_shape.N3 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_shape.N4 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_shape.N5 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_shape.N6 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_shape.N7 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type size() const - { - return m_shape.N0 - * m_shape.N1 - * m_shape.N2 - * m_shape.N3 - * m_shape.N4 - * m_shape.N5 - * m_shape.N6 - * m_shape.N7 - ; - } - - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename traits::size_type dimension( const iType & i ) const - { return Impl::dimension( m_shape , i ); } - - //------------------------------------ - -private: - - template< class ViewRHS > - KOKKOS_INLINE_FUNCTION - void assign_compatible_view( const ViewRHS & rhs , - typename Impl::enable_if< Impl::ViewAssignable< View , ViewRHS >::value >::type * = 0 ) - { - typedef typename traits::shape_type shape_type ; - typedef typename traits::memory_space memory_space ; - typedef typename traits::memory_traits memory_traits ; - - Impl::ViewTracking< traits >::decrement( m_ptr_on_device ); - - shape_type::assign( m_shape, - rhs.m_shape.N0 , rhs.m_shape.N1 , rhs.m_shape.N2 , rhs.m_shape.N3 , - rhs.m_shape.N4 , rhs.m_shape.N5 , rhs.m_shape.N6 , rhs.m_shape.N7 ); - - stride_type::assign( m_stride , rhs.m_stride.value ); - - m_ptr_on_device = rhs.m_ptr_on_device ; - - Impl::ViewTracking< traits >::increment( m_ptr_on_device ); - } - -public: - - //------------------------------------ - // Destructor, constructors, assignment operators: - - KOKKOS_INLINE_FUNCTION - ~View() { Impl::ViewTracking< traits >::decrement( m_ptr_on_device ); } - - KOKKOS_INLINE_FUNCTION - View() : m_ptr_on_device(0) - { - traits::shape_type::assign(m_shape,0,0,0,0,0,0,0,0); - stride_type::assign(m_stride,0); - } - - KOKKOS_INLINE_FUNCTION - View( const View & rhs ) : m_ptr_on_device(0) { assign_compatible_view( rhs ); } - - KOKKOS_INLINE_FUNCTION - View & operator = ( const View & rhs ) { assign_compatible_view( rhs ); return *this ; } - - //------------------------------------ - // Construct or assign compatible view: - - template< class RT , class RL , class RD , class RM > - KOKKOS_INLINE_FUNCTION - View( const View & rhs ) - : m_ptr_on_device(0) { assign_compatible_view( rhs ); } - - template< class RT , class RL , class RD , class RM > - KOKKOS_INLINE_FUNCTION - View & operator = ( const View & rhs ) - { assign_compatible_view( rhs ); return *this ; } - - //------------------------------------ - // Allocation of a managed view with possible alignment padding. - - typedef Impl::if_c< traits::is_managed , - std::string , - Impl::ViewError::allocation_constructor_requires_managed > - if_allocation_constructor ; - - explicit inline - View( const typename if_allocation_constructor::type & label , - const size_t n0 = 0 , - const size_t n1 = 0 , - const size_t n2 = 0 , - const size_t n3 = 0 , - const size_t n4 = 0 , - const size_t n5 = 0 , - const size_t n6 = 0 , - const size_t n7 = 0 ) - : m_ptr_on_device(0) - { - typedef typename traits::device_type device_type ; - typedef typename traits::memory_space memory_space ; - typedef typename traits::shape_type shape_type ; - typedef typename traits::scalar_type scalar_type ; - - shape_type ::assign( m_shape, n0, n1, n2, n3, n4, n5, n6, n7 ); - stride_type::assign_with_padding( m_stride , m_shape ); - - m_ptr_on_device = (scalar_type *) - memory_space::allocate( if_allocation_constructor::select( label ) , - typeid(scalar_type) , - sizeof(scalar_type) , - Impl::capacity( m_shape , m_stride ) ); - - Impl::ViewInitialize< device_type > init( *this ); - } - - explicit inline - View( const AllocateWithoutInitializing & , - const typename if_allocation_constructor::type & label , - const size_t n0 = 0 , - const size_t n1 = 0 , - const size_t n2 = 0 , - const size_t n3 = 0 , - const size_t n4 = 0 , - const size_t n5 = 0 , - const size_t n6 = 0 , - const size_t n7 = 0 ) - : m_ptr_on_device(0) - { - typedef typename traits::device_type device_type ; - typedef typename traits::memory_space memory_space ; - typedef typename traits::shape_type shape_type ; - typedef typename traits::scalar_type scalar_type ; - - shape_type ::assign( m_shape, n0, n1, n2, n3, n4, n5, n6, n7 ); - stride_type::assign_with_padding( m_stride , m_shape ); - - m_ptr_on_device = (scalar_type *) - memory_space::allocate( if_allocation_constructor::select( label ) , - typeid(scalar_type) , - sizeof(scalar_type) , - Impl::capacity( m_shape , m_stride ) ); - } - - //------------------------------------ - // Assign an unmanaged View from pointer, can be called in functors. - // No alignment padding is performed. - - typedef Impl::if_c< ! traits::is_managed , - typename traits::scalar_type * , - Impl::ViewError::user_pointer_constructor_requires_unmanaged > - if_user_pointer_constructor ; - - View( typename if_user_pointer_constructor::type ptr , - const size_t n0 = 0 , - const size_t n1 = 0 , - const size_t n2 = 0 , - const size_t n3 = 0 , - const size_t n4 = 0 , - const size_t n5 = 0 , - const size_t n6 = 0 , - const size_t n7 = 0 ) - : m_ptr_on_device(0) - { - typedef typename traits::shape_type shape_type ; - typedef typename traits::scalar_type scalar_type ; - - shape_type ::assign( m_shape, n0, n1, n2, n3, n4, n5, n6, n7 ); - stride_type::assign_no_padding( m_stride , m_shape ); - - m_ptr_on_device = if_user_pointer_constructor::select( ptr ); - } - - //------------------------------------ - // Assign unmanaged View to portion of Device shared memory - - typedef Impl::if_c< ! traits::is_managed , - typename traits::device_type , - Impl::ViewError::device_shmem_constructor_requires_unmanaged > - if_device_shmem_constructor ; - - explicit KOKKOS_INLINE_FUNCTION - View( typename if_device_shmem_constructor::type & dev , - const unsigned n0 = 0 , - const unsigned n1 = 0 , - const unsigned n2 = 0 , - const unsigned n3 = 0 , - const unsigned n4 = 0 , - const unsigned n5 = 0 , - const unsigned n6 = 0 , - const unsigned n7 = 0 ) - : m_ptr_on_device(0) - { - typedef typename traits::shape_type shape_type ; - typedef typename traits::scalar_type scalar_type ; - - enum { align = 8 }; - enum { mask = align - 1 }; - - shape_type::assign( m_shape, n0, n1, n2, n3, n4, n5, n6, n7 ); - stride_type::assign_no_padding( m_stride , m_shape ); - - typedef Impl::if_c< ! traits::is_managed , - scalar_type * , - Impl::ViewError::device_shmem_constructor_requires_unmanaged > - if_device_shmem_pointer ; - - // Select the first argument: - m_ptr_on_device = if_device_shmem_pointer::select( - (scalar_type *) dev.get_shmem( unsigned( sizeof(scalar_type) * Impl::capacity( m_shape , m_stride ) + unsigned(mask) ) & ~unsigned(mask) ) ); - } - - static inline - unsigned shmem_size( const unsigned n0 = 0 , - const unsigned n1 = 0 , - const unsigned n2 = 0 , - const unsigned n3 = 0 , - const unsigned n4 = 0 , - const unsigned n5 = 0 , - const unsigned n6 = 0 , - const unsigned n7 = 0 ) - { - enum { align = 8 }; - enum { mask = align - 1 }; - - typedef typename traits::shape_type shape_type ; - typedef typename traits::scalar_type scalar_type ; - - shape_type shape ; - stride_type stride ; - - traits::shape_type::assign( shape, n0, n1, n2, n3, n4, n5, n6, n7 ); - stride_type::assign_no_padding( stride , shape ); - - return unsigned( sizeof(scalar_type) * Impl::capacity( shape , stride ) + unsigned(mask) ) & ~unsigned(mask) ; - } - - //------------------------------------ - // Is not allocated - - KOKKOS_INLINE_FUNCTION - bool is_null() const { return 0 == m_ptr_on_device ; } - - //------------------------------------ - // Operators for scalar (rank zero) views. - - typedef Impl::if_c< traits::rank == 0 , - typename traits::scalar_type , - Impl::ViewError::scalar_operator_called_from_non_scalar_view > - if_scalar_operator ; - - KOKKOS_INLINE_FUNCTION - const View & operator = ( const typename if_scalar_operator::type & rhs ) const - { - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - *m_ptr_on_device = if_scalar_operator::select( rhs ); - return *this ; - } - - KOKKOS_INLINE_FUNCTION - operator typename if_scalar_operator::type & () const - { - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - return if_scalar_operator::select( *m_ptr_on_device ); - } - - KOKKOS_INLINE_FUNCTION - typename if_scalar_operator::type & operator()() const - { - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - return if_scalar_operator::select( *m_ptr_on_device ); - } - - KOKKOS_INLINE_FUNCTION - typename if_scalar_operator::type & operator*() const - { - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - return if_scalar_operator::select( *m_ptr_on_device ); - } - - //------------------------------------ - // Array member access operators enabled if - // (1) a zero value of all argument types are compile-time comparable to zero - // (2) the rank matches the number of arguments - // (3) the memory space is valid for the access - //------------------------------------ - // LayoutLeft, rank 1: - - template< typename iType0 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , traits, LayoutLeft, 1, iType0 >::type - operator[] ( const iType0 & i0 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_shape, i0 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 ]; - } - - template< typename iType0 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , traits, LayoutLeft, 1, iType0 >::type - operator() ( const iType0 & i0 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_shape, i0 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 ]; - } - - template< typename iType0 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , traits, LayoutLeft, 1, iType0 >::type - at( const iType0 & i0 , const int , const int , const int , - const int , const int , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_shape, i0 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 ]; - } - - // LayoutLeft, rank 2: - - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 2, iType0, iType1 >::type - operator() ( const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * i1 ]; - } - - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 2, iType0, iType1 >::type - at( const iType0 & i0 , const iType1 & i1 , const int , const int , - const int , const int , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * i1 ]; - } - - // LayoutLeft, rank 3: - - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 3, iType0, iType1, iType2 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_3( m_shape, i0,i1,i2 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * i2 ) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 3, iType0, iType1, iType2 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const int , - const int , const int , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_3( m_shape, i0,i1,i2 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * i2 ) ]; - } - - // LayoutLeft, rank 4: - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 4, iType0, iType1, iType2, iType3 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_4( m_shape, i0,i1,i2,i3 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * i3 )) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 4, iType0, iType1, iType2, iType3 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const int , const int , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_4( m_shape, i0,i1,i2,i3 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * i3 )) ]; - } - - // LayoutLeft, rank 5: - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 5, iType0, iType1, iType2, iType3 , iType4 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_5( m_shape, i0,i1,i2,i3,i4 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * i4 ))) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 5, iType0, iType1, iType2, iType3 , iType4 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const int , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_5( m_shape, i0,i1,i2,i3,i4 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * i4 ))) ]; - } - - // LayoutLeft, rank 6: - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 6, iType0, iType1, iType2, iType3 , iType4, iType5 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_6( m_shape, i0,i1,i2,i3,i4,i5 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * ( - i4 + m_shape.N4 * i5 )))) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 6, iType0, iType1, iType2, iType3 , iType4, iType5 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_6( m_shape, i0,i1,i2,i3,i4,i5 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * ( - i4 + m_shape.N4 * i5 )))) ]; - } - - // LayoutLeft, rank 7: - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 7, iType0, iType1, iType2, iType3 , iType4, iType5, iType6 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_7( m_shape, i0,i1,i2,i3,i4,i5,i6 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * ( - i4 + m_shape.N4 * ( - i5 + m_shape.N5 * i6 ))))) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 7, iType0, iType1, iType2, iType3 , iType4, iType5, iType6 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_7( m_shape, i0,i1,i2,i3,i4,i5,i6 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * ( - i4 + m_shape.N4 * ( - i5 + m_shape.N5 * i6 ))))) ]; - } - - // LayoutLeft, rank 8: - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 , typename iType7 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 8, iType0, iType1, iType2, iType3 , iType4, iType5, iType6, iType7 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_8( m_shape, i0,i1,i2,i3,i4,i5,i6,i7 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * ( - i4 + m_shape.N4 * ( - i5 + m_shape.N5 * ( - i6 + m_shape.N6 * i7 )))))) ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 , typename iType7 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutLeft, 8, iType0, iType1, iType2, iType3 , iType4, iType5, iType6, iType7 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_8( m_shape, i0,i1,i2,i3,i4,i5,i6,i7 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 + m_stride.value * ( - i1 + m_shape.N1 * ( - i2 + m_shape.N2 * ( - i3 + m_shape.N3 * ( - i4 + m_shape.N4 * ( - i5 + m_shape.N5 * ( - i6 + m_shape.N6 * i7 )))))) ]; - } - - //------------------------------------ - // LayoutRight, rank 1: - - template< typename iType0 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , traits, LayoutRight, 1, iType0 >::type - operator[] ( const iType0 & i0 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_shape, i0 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 ]; - } - - template< typename iType0 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , traits, LayoutRight, 1, iType0 >::type - operator() ( const iType0 & i0 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_shape, i0 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 ]; - } - - template< typename iType0 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , traits, LayoutRight, 1, iType0 >::type - at( const iType0 & i0 , const int , const int , const int , - const int , const int , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_shape, i0 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i0 ]; - } - - // LayoutRight, rank 2: - - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 2, iType0, iType1 >::type - operator() ( const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i1 + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 2, iType0, iType1 >::type - at( const iType0 & i0 , const iType1 & i1 , const int , const int , - const int , const int , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i1 + i0 * m_stride.value ]; - } - - // LayoutRight, rank 3: - - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 3, iType0, iType1, iType2 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_3( m_shape, i0,i1,i2 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i2 + m_shape.N2 * ( i1 ) + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 3, iType0, iType1, iType2 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const int , - const int , const int , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_3( m_shape, i0,i1,i2 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i2 + m_shape.N2 * ( i1 ) + i0 * m_stride.value ]; - } - - // LayoutRight, rank 4: - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 4, iType0, iType1, iType2, iType3 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_4( m_shape, i0,i1,i2,i3 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 )) + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 4, iType0, iType1, iType2, iType3 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const int , const int , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_4( m_shape, i0,i1,i2,i3 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 )) + i0 * m_stride.value ]; - } - - // LayoutRight, rank 5: - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 5, iType0, iType1, iType2, iType3, iType4 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_5( m_shape, i0,i1,i2,i3,i4 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 ))) + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 5, iType0, iType1, iType2, iType3, iType4 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const int , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_5( m_shape, i0,i1,i2,i3,i4 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 ))) + i0 * m_stride.value ]; - } - - // LayoutRight, rank 6: - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 6, iType0, iType1, iType2, iType3, iType4, iType5 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_6( m_shape, i0,i1,i2,i3,i4,i5 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i5 + m_shape.N5 * ( - i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 )))) + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 6, iType0, iType1, iType2, iType3, iType4, iType5 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const int , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_6( m_shape, i0,i1,i2,i3,i4,i5 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i5 + m_shape.N5 * ( - i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 )))) + i0 * m_stride.value ]; - } - - // LayoutRight, rank 7: - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 7, iType0, iType1, iType2, iType3, iType4, iType5, iType6 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_7( m_shape, i0,i1,i2,i3,i4,i5,i6 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i6 + m_shape.N6 * ( - i5 + m_shape.N5 * ( - i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 ))))) + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 7, iType0, iType1, iType2, iType3, iType4, iType5, iType6 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const int ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_7( m_shape, i0,i1,i2,i3,i4,i5,i6 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i6 + m_shape.N6 * ( - i5 + m_shape.N5 * ( - i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 ))))) + i0 * m_stride.value ]; - } - - // LayoutRight, rank 8: - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 , typename iType7 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 8, iType0, iType1, iType2, iType3, iType4, iType5, iType6, iType7 >::type - operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_8( m_shape, i0,i1,i2,i3,i4,i5,i6,i7 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i7 + m_shape.N7 * ( - i6 + m_shape.N6 * ( - i5 + m_shape.N5 * ( - i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 )))))) + i0 * m_stride.value ]; - } - - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , - typename iType4 , typename iType5 , typename iType6 , typename iType7 > - KOKKOS_INLINE_FUNCTION - typename Impl::ViewEnableArrayOper< typename traits::scalar_type & , - traits, LayoutRight, 8, iType0, iType1, iType2, iType3, iType4, iType5, iType6, iType7 >::type - at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , - const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const - { - KOKKOS_ASSERT_SHAPE_BOUNDS_8( m_shape, i0,i1,i2,i3,i4,i5,i6,i7 ); - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - - return m_ptr_on_device[ i7 + m_shape.N7 * ( - i6 + m_shape.N6 * ( - i5 + m_shape.N5 * ( - i4 + m_shape.N4 * ( - i3 + m_shape.N3 * ( - i2 + m_shape.N2 * ( - i1 )))))) + i0 * m_stride.value ]; - } - - //------------------------------------ - // Access to the underlying contiguous storage of this view specialization. - // These methods are specific to specialization of a view. - - KOKKOS_INLINE_FUNCTION - typename traits::scalar_type * ptr_on_device() const { return m_ptr_on_device ; } - - // Stride of physical storage, dimensioned to at least Rank - template< typename iType > - KOKKOS_INLINE_FUNCTION - void stride( iType * const s ) const - { Impl::stride( s , m_shape , m_stride ); } - - // Count of contiguously allocated data members including padding. - KOKKOS_INLINE_FUNCTION - typename traits::size_type capacity() const - { return Impl::capacity( m_shape , m_stride ); } -}; - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template< class LT , class LL , class LD , class LM , class LS , - class RT , class RL , class RD , class RM , class RS > -KOKKOS_INLINE_FUNCTION -typename Impl::enable_if<( Impl::is_same< LS , RS >::value ), bool >::type -operator == ( const View & lhs , - const View & rhs ) -{ - // Same data, layout, dimensions - typedef ViewTraits lhs_traits ; - typedef ViewTraits rhs_traits ; - - return - Impl::is_same< typename lhs_traits::const_data_type , - typename rhs_traits::const_data_type >::value && - Impl::is_same< typename lhs_traits::array_layout , - typename rhs_traits::array_layout >::value && - Impl::is_same< typename lhs_traits::memory_space , - typename rhs_traits::memory_space >::value && - Impl::is_same< typename lhs_traits::specialize , - typename rhs_traits::specialize >::value && - lhs.ptr_on_device() == rhs.ptr_on_device() && - lhs.shape() == rhs.shape() ; -} - -template< class LT , class LL , class LD , class LM , class LS , - class RT , class RL , class RD , class RM , class RS > -KOKKOS_INLINE_FUNCTION -bool operator != ( const View & lhs , - const View & rhs ) -{ - return ! operator==( lhs , rhs ); -} - -//---------------------------------------------------------------------------- - - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -//---------------------------------------------------------------------------- -/** \brief Deep copy a value into a view. - */ -template< class DT , class DL , class DD , class DM , class DS > -inline -void deep_copy( const View & dst , - typename Impl::enable_if<( - Impl::is_same< typename ViewTraits::non_const_scalar_type , - typename ViewTraits::scalar_type >::value - ), typename ViewTraits::const_scalar_type >::type & value ) -{ - Impl::ViewFill< View >( dst , value ); -} - -template< class ST , class SL , class SD , class SM , class SS > -inline -typename Impl::enable_if<( ViewTraits::rank == 0 )>::type -deep_copy( ST & dst , const View & src ) -{ - typedef ViewTraits src_traits ; - typedef typename src_traits::memory_space src_memory_space ; - Impl::DeepCopy< HostSpace , src_memory_space >( & dst , src.ptr_on_device() , sizeof(ST) ); -} - -//---------------------------------------------------------------------------- -/** \brief A deep copy between views of the same specialization, compatible type, - * same rank, same layout are handled by that specialization. - */ -template< class DT , class DL , class DD , class DM , class DS , - class ST , class SL , class SD , class SM , class SS > -inline -void deep_copy( const View & dst , - const View & src , - typename Impl::enable_if<( - Impl::is_same< typename ViewTraits::scalar_type , - typename ViewTraits::non_const_scalar_type >::value - && - Impl::is_same< typename ViewTraits::array_layout , - typename ViewTraits::array_layout >::value - && - ( unsigned(ViewTraits::rank) == unsigned(ViewTraits::rank) ) - )>::type * = 0 ) -{ - typedef ViewTraits dst_traits ; - typedef ViewTraits src_traits ; - - typedef typename dst_traits::memory_space dst_memory_space ; - typedef typename src_traits::memory_space src_memory_space ; - - if ( dst.ptr_on_device() != src.ptr_on_device() ) { - - Impl::assert_shapes_are_equal( dst.shape() , src.shape() ); - - const size_t nbytes = sizeof(typename dst_traits::scalar_type) * dst.capacity(); - - Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes ); - } -} - - -/** \brief Deep copy equal dimension arrays in the host space which - * have different layouts or specializations. - */ -template< class DT , class DL , class DD , class DM , class DS , - class ST , class SL , class SM , class SS > -inline -void deep_copy( const View< DT, DL, DD, DM, DS> & dst , - const View< ST, SL, DD, SM, SS> & src , - const typename Impl::enable_if<( - // Destination is not constant: - Impl::is_same< typename ViewTraits::value_type , - typename ViewTraits::non_const_value_type >::value - && - // Same rank - ( unsigned( ViewTraits::rank ) == - unsigned( ViewTraits::rank ) ) - && - // Different layout or different specialization: - ( ( ! Impl::is_same< typename DL::array_layout , - typename SL::array_layout >::value ) - || - ( ! Impl::is_same< DS , SS >::value ) - ) - )>::type * = 0 ) -{ - typedef View< DT, DL, DD, DM, DS> dst_type ; - typedef View< ST, SL, DD, SM, SS> src_type ; - - assert_shapes_equal_dimension( dst.shape() , src.shape() ); - - Impl::ViewRemap< dst_type , src_type >( dst , src ); -} - -//---------------------------------------------------------------------------- - -template< class T , class L , class D , class M , class S > -typename Impl::enable_if<( - View::is_managed - ), typename View::HostMirror >::type -inline -create_mirror( const View & src ) -{ - typedef View view_type ; - typedef typename view_type::HostMirror host_view_type ; - typedef typename view_type::memory_space memory_space ; - - // 'view' is managed therefore we can allocate a - // compatible host_view through the ordinary constructor. - - std::string label = memory_space::query_label( src.ptr_on_device() ); - label.append("_mirror"); - - return host_view_type( label , - src.dimension_0() , - src.dimension_1() , - src.dimension_2() , - src.dimension_3() , - src.dimension_4() , - src.dimension_5() , - src.dimension_6() , - src.dimension_7() ); -} - -template< class T , class L , class D , class M , class S > -typename Impl::enable_if<( - View::is_managed && - Impl::ViewAssignable< typename View::HostMirror , View >::value - ), typename View::HostMirror >::type -inline -create_mirror_view( const View & src ) -{ - return src ; -} - -template< class T , class L , class D , class M , class S > -typename Impl::enable_if<( - View::is_managed && - ! Impl::ViewAssignable< typename View::HostMirror , View >::value - ), typename View::HostMirror >::type -inline -create_mirror_view( const View & src ) -{ - return create_mirror( src ); -} - -//---------------------------------------------------------------------------- - -/** \brief Resize a view with copying old data to new data at the corresponding indices. */ -template< class T , class L , class D , class M , class S > -inline -void resize( View & v , - const typename Impl::enable_if< ViewTraits::is_managed , size_t >::type n0 , - const size_t n1 = 0 , - const size_t n2 = 0 , - const size_t n3 = 0 , - const size_t n4 = 0 , - const size_t n5 = 0 , - const size_t n6 = 0 , - const size_t n7 = 0 ) -{ - typedef View view_type ; - typedef typename view_type::memory_space memory_space ; - - const std::string label = memory_space::query_label( v.ptr_on_device() ); - - view_type v_resized( label, n0, n1, n2, n3, n4, n5, n6, n7 ); - - Impl::ViewRemap< view_type , view_type >( v_resized , v ); - - v = v_resized ; -} - -/** \brief Reallocate a view without copying old data to new data */ -template< class T , class L , class D , class M , class S > -inline -void realloc( View & v , - const typename Impl::enable_if< ViewTraits::is_managed , size_t >::type n0 , - const size_t n1 = 0 , - const size_t n2 = 0 , - const size_t n3 = 0 , - const size_t n4 = 0 , - const size_t n5 = 0 , - const size_t n6 = 0 , - const size_t n7 = 0 ) -{ - typedef View view_type ; - typedef typename view_type::memory_space memory_space ; - - // Query the current label and reuse it. - const std::string label = memory_space::query_label( v.ptr_on_device() ); - - v = view_type(); // deallocate first, if the only view to memory. - v = view_type( label, n0, n1, n2, n3, n4, n5, n6, n7 ); -} - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -struct ALL { KOKKOS_INLINE_FUNCTION ALL(){} }; - -template< class DstViewType , - class T , class L , class D , class M , class S , - class ArgType0 > -KOKKOS_INLINE_FUNCTION -DstViewType -subview( const View & src , - const ArgType0 & arg0 ) -{ - DstViewType dst ; - - Impl::ViewAssignment( dst , src , arg0 ); - - return dst ; -} - -template< class DstViewType , - class T , class L , class D , class M , class S , - class ArgType0 , class ArgType1 > -KOKKOS_INLINE_FUNCTION -DstViewType -subview( const View & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 ) -{ - DstViewType dst ; - - Impl::ViewAssignment( dst, src, arg0, arg1 ); - - return dst ; -} - -template< class DstViewType , - class T , class L , class D , class M , class S , - class ArgType0 , class ArgType1 , class ArgType2 > -KOKKOS_INLINE_FUNCTION -DstViewType -subview( const View & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 ) -{ - DstViewType dst ; - - Impl::ViewAssignment( dst, src, arg0, arg1, arg2 ); - - return dst ; -} - -template< class DstViewType , - class T , class L , class D , class M , class S , - class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 > -KOKKOS_INLINE_FUNCTION -DstViewType -subview( const View & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 , - const ArgType3 & arg3 ) -{ - DstViewType dst ; - - Impl::ViewAssignment( dst, src, arg0, arg1, arg2, arg3 ); - - return dst ; -} - -template< class DstViewType , - class T , class L , class D , class M , class S , - class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , - class ArgType4 > -KOKKOS_INLINE_FUNCTION -DstViewType -subview( const View & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 , - const ArgType3 & arg3 , - const ArgType4 & arg4 ) -{ - DstViewType dst ; - - Impl::ViewAssignment( dst, src, arg0, arg1, arg2, arg3, arg4 ); - - return dst ; -} - -template< class DstViewType , - class T , class L , class D , class M , class S , - class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , - class ArgType4 , class ArgType5 > -KOKKOS_INLINE_FUNCTION -DstViewType -subview( const View & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 , - const ArgType3 & arg3 , - const ArgType4 & arg4 , - const ArgType5 & arg5 ) -{ - DstViewType dst ; - - Impl::ViewAssignment( dst, src, arg0, arg1, arg2, arg3, arg4, arg5 ); - - return dst ; -} - -template< class DstViewType , - class T , class L , class D , class M , class S , - class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , - class ArgType4 , class ArgType5 , class ArgType6 > -KOKKOS_INLINE_FUNCTION -DstViewType -subview( const View & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 , - const ArgType3 & arg3 , - const ArgType4 & arg4 , - const ArgType5 & arg5 , - const ArgType6 & arg6 ) -{ - DstViewType dst ; - - Impl::ViewAssignment( dst, src, arg0, arg1, arg2, arg3, arg4, arg5, arg6 ); - - return dst ; -} - -template< class DstViewType , - class T , class L , class D , class M , class S , - class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , - class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 > -KOKKOS_INLINE_FUNCTION -DstViewType -subview( const View & src , - const ArgType0 & arg0 , - const ArgType1 & arg1 , - const ArgType2 & arg2 , - const ArgType3 & arg3 , - const ArgType4 & arg4 , - const ArgType5 & arg5 , - const ArgType6 & arg6 , - const ArgType7 & arg7 ) -{ - DstViewType dst ; - - Impl::ViewAssignment( dst, src, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 ); - - return dst ; -} - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif - diff --git a/kokkos/kokkos/core/src/Kokkos_hwloc.hpp b/kokkos/kokkos/core/src/Kokkos_hwloc.hpp deleted file mode 100644 index e7615ca..0000000 --- a/kokkos/kokkos/core/src/Kokkos_hwloc.hpp +++ /dev/null @@ -1,175 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_HWLOC_HPP -#define KOKKOS_HWLOC_HPP - -#include - -namespace Kokkos { - -/** \brief Minimal subset of logical 'hwloc' functionality available - * from http://www.open-mpi.org/projects/hwloc/. - * - * The calls are NOT thread safe in order to avoid mutexes, - * memory allocations, or other actions which could give the - * runtime system an opportunity to migrate the threads or - * touch allocated memory during the function calls. - * - * All calls to these functions should be performed by a thread - * when it has guaranteed exclusive access; e.g., for OpenMP - * within a 'critical' region. - */ -namespace hwloc { - -/** \brief Query if hwloc is available */ -bool available(); - -/** \brief Query number of available NUMA regions. - * This will be less than the hardware capacity - * if the MPI process is pinned to a NUMA region. - */ -unsigned get_available_numa_count(); - -/** \brief Query number of available cores per NUMA regions. - * This will be less than the hardware capacity - * if the MPI process is pinned to a set of cores. - */ -unsigned get_available_cores_per_numa(); - -/** \brief Query number of available "hard" threads per core; i.e., hyperthreads */ -unsigned get_available_threads_per_core(); - - -/** \brief Query the core topology of ( NUMA x Core/NUMA ). - * - * The topology is limited by the process binding, - * which may have been set by MPI. NUMA rank #0 - * contains the core on which the process / master thread - * is running. The master thread should only be bound - * to its original NUMA rank - because moving it to - * a different NUMA rank will displace it from all of - * the memory which it has already touched. - */ -std::pair get_core_topology(); - -/** \brief Number of concurrent threads per core. - * - * This typically reflects the number of hyperthreads - * the core can support. - */ -unsigned get_core_capacity(); - -} /* namespace hwloc */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -// Internal functions for binding persistent spawned threads. - -namespace Kokkos { -namespace hwloc { - -/** \brief Determine best use of cores for a given thread count */ -std::pair use_core_topology( const unsigned thread_count ); - -/** \brief Query core-coordinate of the current thread - * with respect to the core_topology. - * - * As long as the thread is running within the - * process binding the following condition holds. - * - * core_coordinate.first < core_topology.first - * core_coordinate.second < core_topology.second - */ -std::pair get_this_thread_coordinate(); - -/** \brief Bind the current thread to a core. */ -bool bind_this_thread( const std::pair ); - -/** \brief Bind the current thread to one of the cores in the list. - * Set that entry to (~0,~0) and return the index. - * If binding fails return ~0. - */ -unsigned bind_this_thread( const unsigned coordinate_count , - std::pair coordinate[] ); - -/** \brief Unbind the current thread back to the original process binding */ -bool unbind_this_thread(); - -void thread_mapping( const std::pair team_topo , - const std::pair core_use , - const std::pair core_topo , - std::pair thread_coord[] ); - -void thread_mapping( const std::pair team_topo , - const std::pair core_use , - const std::pair core_topo , - const std::pair master_coord , - std::pair thread_coord[] ); - -} /* namespace hwloc */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -void host_thread_mapping( const std::pair team_topo , - const std::pair core_use , - const std::pair core_topo , - std::pair thread_coord[] ); - -void host_thread_mapping( const std::pair team_topo , - const std::pair core_use , - const std::pair core_topo , - const std::pair master_coord , - std::pair thread_coord[] ); - -} -} - -#endif /* #define KOKKOS_HWLOC_HPP */ - diff --git a/kokkos/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/kokkos/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp deleted file mode 100644 index b26f9fa..0000000 --- a/kokkos/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ /dev/null @@ -1,412 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_OPENMP_PARALLEL_HPP -#define KOKKOS_OPENMP_PARALLEL_HPP - -#include - -#include -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class FunctorType , class WorkSpec > -class ParallelFor< FunctorType , WorkSpec , ::Kokkos::OpenMP > -{ -public: - - inline - ParallelFor( const FunctorType & functor , const size_t work_count ) - { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); - -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread( omp_get_thread_num() ); - - const std::pair< size_t , size_t > range = exec.work_range( work_count ); - - for ( size_t iwork = range.first ; iwork < range.second ; ++iwork ) { - functor( iwork ); - } - } -/* END #pragma omp parallel */ - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class FunctorType , class WorkSpec > -class ParallelReduce< FunctorType , WorkSpec , Kokkos::OpenMP > -{ -public: - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - - inline - ParallelReduce( const FunctorType & functor , - const size_t work_count , - pointer_type result = 0 ) - { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); - - OpenMPexec::resize_reduce_scratch( Reduce::value_size( functor ) ); - -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread( omp_get_thread_num() ); - - const std::pair range = exec.work_range( work_count ); - - typename Reduce::reference_type update = Reduce::reference( exec.reduce_base() ); - - functor.init( update ); - - for ( size_t iw = range.first ; iw < range.second ; ++iw ) { - functor( iw , update ); - } - } -/* END #pragma omp parallel */ - - { - const int n = omp_get_max_threads(); - const pointer_type ptr = pointer_type( OpenMPexec::get_thread(0)->reduce_base() ); - typename Reduce::reference_type update = Reduce::reference( ptr ); - - for ( int i = 1 ; i < n ; ++i ) { - functor.join( update , Reduce::reference( OpenMPexec::get_thread(i)->reduce_base() ) ); - } - - Reduce::final( functor , ptr ); - - if ( result ) { - const int n = Reduce::value_count( functor ); - - for ( int i = 0 ; i < n ; ++i ) { result[i] = ptr[i] ; } - } - } - } - - void wait() {} -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class FunctorType , class WorkSpec > -class ParallelScan< FunctorType , WorkSpec , Kokkos::OpenMP > -{ -public: - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - - inline - ParallelScan( const FunctorType & functor , const size_t work_count ) - { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_scan"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_scan"); - - OpenMPexec::resize_reduce_scratch( 2 * Reduce::value_size( functor ) ); - -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread( omp_get_thread_num() ); - - const std::pair range = exec.work_range( work_count ); - - typename Reduce::reference_type update = - Reduce::reference( pointer_type( exec.reduce_base() ) + Reduce::value_count( functor ) ); - - functor.init( update ); - - for ( size_t iw = range.first ; iw < range.second ; ++iw ) { - functor( iw , update , false ); - } - } -/* END #pragma omp parallel */ - - { - const unsigned thread_count = omp_get_max_threads(); - const unsigned value_count = Reduce::value_count( functor ); - const unsigned team_max = OpenMP::team_max(); - - pointer_type ptr_prev = 0 ; - - for ( unsigned rank = 0 ; rank < thread_count ; ++rank ) { - const unsigned league_rank = rank / team_max ; - const unsigned team_rank = rank % team_max ; - - pointer_type ptr = pointer_type( OpenMPexec::find_thread(league_rank,team_rank)->reduce_base() ); - - if ( rank ) { - for ( unsigned i = 0 ; i < value_count ; ++i ) { ptr[i] = ptr_prev[ i + value_count ] ; } - functor.join( Reduce::reference( ptr + value_count ) , Reduce::reference( ptr ) ); - } - else { - functor.init( Reduce::reference( ptr ) ); - } - - ptr_prev = ptr ; - } - } - -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread( omp_get_thread_num() ); - - const std::pair range = exec.work_range( work_count ); - - typename Reduce::reference_type update = - Reduce::reference( pointer_type( exec.reduce_base() ) ); - - for ( size_t iw = range.first ; iw < range.second ; ++iw ) { - functor( iw , update , false ); - } - } -/* END #pragma omp parallel */ - - } - - void wait() {} -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_USE_PRAGMA_SIMD ) - -namespace Kokkos { -namespace Impl { - -template< class FunctorType > -class ParallelReduce< FunctorType , VectorParallel , ::Kokkos::OpenMP > -{ -public: - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - - inline - ParallelReduce( const FunctorType & functor , - const size_t work_count , - pointer_type result = 0 ) - { - typedef integral_constant< size_t , OpenMPexec::VECTOR_LENGTH > vector_length ; - typedef integral_constant< size_t , OpenMPexec::VECTOR_LENGTH - 1 > vector_mask ; - - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); - - OpenMPexec::resize_reduce_scratch( Reduce::value_size( functor ) * vector_length::value ); - -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread( omp_get_thread_num() ); - const pointer_type ptr = pointer_type( exec.reduce_base() ); - - const std::pair range = exec.work_range( work_count ); - -#pragma simd -#pragma ivdep - for ( size_t iv = 0 ; iv < vector_length::value ; ++iv ) { - functor.init( Reduce::reference( ptr + iv * Reduce::value_count( functor ) ) ); - } - -#pragma simd vectorlength( vector_length::value ) -#pragma ivdep - for ( size_t iw = range.first ; iw < range.second ; ++iw ) { - functor( iw , Reduce::reference( ptr + ( iw & vector_mask::value ) * Reduce::value_count( functor ) ) ); - } - - for ( size_t iv = 1 ; iv < vector_length::value ; ++iv ) { - functor.join( Reduce::reference( ptr ) , - Reduce::reference( ptr + iv * Reduce::value_count( functor ) ) ); - } - } -/* END #pragma omp parallel */ - - { - const int n = omp_get_max_threads(); - const pointer_type ptr = pointer_type( OpenMPexec::get_thread(0)->reduce_base() ); - - for ( int i = 1 ; i < n ; ++i ) { - functor.join( Reduce::reference( ptr ) , - Reduce::reference( OpenMPexec::get_thread(i)->reduce_base() ) ); - } - - Reduce::final( functor , ptr ); - - if ( result ) { - const int n = Reduce::value_count( functor ); - - for ( int i = 0 ; i < n ; ++i ) { result[i] = ptr[i] ; } - } - } - } - - void wait() {} -}; - -} // namespace Impl -} // namespace Kokkos - -#endif /* #if defined( KOKKOS_USE_PRAGMA_SIMD ) */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class FunctorType > -class ParallelFor< FunctorType , ParallelWorkRequest , ::Kokkos::OpenMP > -{ -public: - - inline - ParallelFor( const FunctorType & functor , - const ParallelWorkRequest & work ) - { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); - - OpenMPexec::resize_shared_scratch( FunctorShmemSize< FunctorType >::value( functor ) ); - -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread( omp_get_thread_num() ); - - for ( exec.team_work_init( work.league_size ) ; exec.team_work_avail() ; exec.team_work_next() ) { - functor( OpenMP( exec ) ); - } - } -/* END #pragma omp parallel */ - } - - void wait() {} -}; - -template< class FunctorType > -class ParallelReduce< FunctorType , ParallelWorkRequest , ::Kokkos::OpenMP > -{ -public: - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - - inline - ParallelReduce( const FunctorType & functor , - const ParallelWorkRequest & work , - pointer_type result = 0 ) - { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); - - OpenMPexec::resize_shared_scratch( FunctorShmemSize< FunctorType >::value( functor ) ); - OpenMPexec::resize_reduce_scratch( Reduce::value_size( functor ) ); - -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread( omp_get_thread_num() ); - - typename Reduce::reference_type update = Reduce::reference( exec.reduce_base() ); - - functor.init( update ); - - for ( exec.team_work_init( work.league_size ) ; exec.team_work_avail() ; exec.team_work_next() ) { - functor( OpenMP( exec ) , update ); - } - } -/* END #pragma omp parallel */ - - { - const int n = omp_get_max_threads(); - const pointer_type ptr = pointer_type( OpenMPexec::get_thread(0)->reduce_base() ); - - for ( int i = 1 ; i < n ; ++i ) { - functor.join( Reduce::reference( ptr ) , - Reduce::reference( OpenMPexec::get_thread(i)->reduce_base() ) ); - } - - Reduce::final( functor , ptr ); - - if ( result ) { - const int n = Reduce::value_count( functor ); - - for ( int i = 0 ; i < n ; ++i ) { result[i] = ptr[i] ; } - } - } - } - - void wait() {} -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* KOKKOS_OPENMP_PARALLEL_HPP */ - diff --git a/kokkos/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp b/kokkos/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp deleted file mode 100644 index cdc9f47..0000000 --- a/kokkos/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp +++ /dev/null @@ -1,395 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include -#include -#include -#include - -namespace Kokkos { -namespace Impl { -namespace { - -int kokkos_omp_in_parallel(); - -int kokkos_omp_in_critical_region = ( Kokkos::HostSpace::register_in_parallel( kokkos_omp_in_parallel ) , 0 ); - -int kokkos_omp_in_parallel() -{ - return omp_in_parallel() && ! kokkos_omp_in_critical_region ; -} - -} // namespace -} // namespace Impl -} // namespace Kokkos - - -namespace Kokkos { -namespace Impl { - -OpenMPexec * OpenMPexec::m_thread[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; - -OpenMPexec * OpenMPexec::find_thread( const int init_league_rank , - const int team_rank ) -{ - for ( unsigned i = 0 ; i < OpenMPexec::MAX_THREAD_COUNT && 0 != m_thread[i] ; ++i ) { - if ( init_league_rank == m_thread[i]->m_init_league_rank && - team_rank == m_thread[i]->m_team_rank ) { - return m_thread[i] ; - } - } - return (OpenMPexec *) 0 ; -} - -OpenMPexec::OpenMPexec( const unsigned league_rank , - const unsigned league_size , - const unsigned team_rank , - const unsigned team_size ) - : m_reduce(0) - , m_shared(0) - , m_shared_end(0) - , m_shared_iter(0) - , m_state_team( OpenMPexec::Active ) - , m_fan_team_size(0) - , m_team_rank( team_rank ) - , m_team_size( team_size ) - , m_init_league_rank( league_rank ) - , m_init_league_size( league_size ) - , m_work_league_rank( league_rank ) - , m_work_league_end( league_rank + 1 ) - , m_work_league_size( league_size ) -{ - for ( int i = 0 ; i < MAX_FAN_COUNT ; ++i ) { m_fan_team[i] = 0 ; } -} - -OpenMPexec::~OpenMPexec() {} - - -void OpenMPexec::verify_is_process( const char * const label ) -{ - if ( omp_in_parallel() ) { - std::string msg( label ); - msg.append( " ERROR: in parallel" ); - Kokkos::Impl::throw_runtime_exception( msg ); - } -} - -void OpenMPexec::verify_initialized( const char * const label ) -{ - if ( 0 == m_thread[0] ) { - std::string msg( label ); - msg.append( " ERROR: not initialized" ); - Kokkos::Impl::throw_runtime_exception( msg ); - } -} - -void OpenMPexec::resize_reduce_scratch( size_t size ) -{ - static size_t s_size = 0 ; - - verify_initialized( "OpenMP::resize_reduce_scratch" ); - verify_is_process( "OpenMP::resize_reduce_scratch" ); - - if ( size ) { size += REDUCE_TEAM_BASE ; } - - const size_t rem = size % Kokkos::Impl::MEMORY_ALIGNMENT ; - - if ( rem ) size += Kokkos::Impl::MEMORY_ALIGNMENT - rem ; - - if ( ( 0 == size && 0 != s_size ) || s_size < size ) { - -#pragma omp parallel - { - OpenMPexec & th = * m_thread[ omp_get_thread_num() ]; - -#pragma omp critical - { - kokkos_omp_in_critical_region = 1 ; - - if ( th.m_reduce ) { - HostSpace::decrement( th.m_reduce ); - th.m_reduce = 0 ; - } - - if ( size ) { - th.m_reduce = HostSpace::allocate( "openmp_reduce_scratch" , typeid(unsigned char) , 1 , size ); - } - kokkos_omp_in_critical_region = 0 ; - } -/* END #pragma omp critical */ - } -/* END #pragma omp parallel */ - } - - s_size = size ; -} - -void OpenMPexec::resize_shared_scratch( size_t size ) -{ - static size_t s_size = 0 ; - - verify_initialized( "OpenMP::resize_shared_scratch" ); - verify_is_process( "OpenMP::resize_shared_scratch" ); - - const size_t rem = size % Kokkos::Impl::MEMORY_ALIGNMENT ; - - if ( rem ) size += Kokkos::Impl::MEMORY_ALIGNMENT - rem ; - - if ( ( 0 == size && 0 != s_size ) || s_size < size ) { - -#pragma omp parallel - { - OpenMPexec & th = * m_thread[ omp_get_thread_num() ]; - - if ( 0 == th.m_team_rank ) { -#pragma omp critical - { - kokkos_omp_in_critical_region = 1 ; - - if ( th.m_shared ) { - HostSpace::decrement( th.m_shared ); - th.m_shared = 0 ; - } - - if ( size ) { - th.m_shared = HostSpace::allocate( "openmp_shared_scratch" , typeid(unsigned char) , 1 , size ); - th.m_shared_end = size ; - } - - kokkos_omp_in_critical_region = 0 ; - } -/* END #pragma omp critical */ - // Push to threads in the same team - - for ( int i = 0 ; i < omp_get_num_threads() ; ++i ) { - if ( th.m_init_league_rank == m_thread[i]->m_init_league_rank ) { - m_thread[i]->m_shared = th.m_shared ; - m_thread[i]->m_shared_end = th.m_shared_end ; - } - } - } - } -/* END #pragma omp parallel */ - } - - s_size = size ; -} - -void * OpenMPexec::get_shmem( const int size ) -{ - // m_shared_iter is in bytes, convert to integer offsets - const int offset = m_shared_iter >> power_of_two::value ; - - m_shared_iter += size ; - - if ( m_shared_end < m_shared_iter ) { - Kokkos::Impl::throw_runtime_exception( std::string("OpenMPexec::get_shmem FAILED : exceeded shared memory size" ) ); - } - - return ((int*)m_shared) + offset ; -} - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -unsigned OpenMP::league_max() -{ - Impl::OpenMPexec::verify_initialized("Kokkos::OpenMP::league_max" ); - Impl::OpenMPexec::verify_is_process("Kokkos::OpenMP::league_max" ); - - return unsigned( std::numeric_limits::max() ); -} - -unsigned OpenMP::team_max() -{ - Impl::OpenMPexec::verify_initialized("Kokkos::OpenMP::team_max" ); - Impl::OpenMPexec::verify_is_process("Kokkos::OpenMP::team_max" ); - - return Impl::OpenMPexec::m_thread[0]->m_team_size ; -} - -//---------------------------------------------------------------------------- - -int OpenMP::is_initialized() -{ return 0 != Impl::OpenMPexec::m_thread[0]; } - -void OpenMP::initialize( const unsigned team_count , - const unsigned threads_per_team , - const unsigned numa_count , - const unsigned cores_per_numa ) -{ - Impl::OpenMPexec::verify_is_process("Kokkos::OpenMP::initialize" ); - - if ( Impl::OpenMPexec::m_thread[0] ) { - Kokkos::Impl::throw_runtime_exception("Kokkos:OpenMP::initialize ERROR : already initialized" ); - } - - const unsigned thread_count = team_count * threads_per_team ; - - omp_set_num_threads( thread_count ); - - if ( thread_count == 0 ) return ; - - //---------------------------------------- - // Spawn threads: - - // Verify OMP interaction: - { - if ( int(thread_count) != omp_get_max_threads() ) { - Kokkos::Impl::throw_runtime_exception("Kokkos:OpenMP::initialize ERROR : failed omp_get_max_threads()" ); - } - -#pragma omp parallel - { - if ( int(thread_count) != omp_get_num_threads() ) { - Kokkos::Impl::throw_runtime_exception("Kokkos:OpenMP::initialize ERROR : failed omp_get_num_threads()" ); - } - } - } - - //---------------------------------------- - - const bool use_hwloc = ( 1 < thread_count ) && Kokkos::hwloc::available(); - - const std::pair - hwloc_core_topo( Kokkos::hwloc::get_available_numa_count() , - Kokkos::hwloc::get_available_cores_per_numa() ); - - std::pair team_topology( team_count , threads_per_team ); - std::pair use_core_topology( numa_count , cores_per_numa ); - std::pair master_coord = Kokkos::hwloc::get_this_thread_coordinate(); - - std::pair threads_coord[ Impl::OpenMPexec::MAX_THREAD_COUNT ]; - - if ( use_hwloc ) { - - if ( 0 == use_core_topology.first && 0 == use_core_topology.second ) { - use_core_topology = Kokkos::hwloc::use_core_topology( thread_count ); - } - - Kokkos::hwloc::thread_mapping( team_topology , use_core_topology , hwloc_core_topo , master_coord , threads_coord ); - } - - // Bind threads and allocate thread data: - -#pragma omp parallel - { -#pragma omp critical - { - // Call to 'bind_this_thread' is not thread safe so place this whole block in a critical region. - // Call to 'new' may not be thread safe as well. - - // Reverse the rank for threads so that the scan operation reduces to the highest rank thread. - - const unsigned omp_rank = omp_get_thread_num(); - const unsigned thread_r = use_hwloc ? Kokkos::hwloc::bind_this_thread( thread_count , threads_coord ) : omp_rank ; - const unsigned thread_rank = thread_count - ( thread_r + 1 ); - const unsigned league_rank = thread_rank / threads_per_team ; - const unsigned team_rank = thread_rank % threads_per_team ; - - Impl::OpenMPexec::m_thread[ omp_rank ] = new Impl::OpenMPexec( league_rank , team_count , team_rank , threads_per_team ); - } -/* END #pragma omp critical */ - } -/* END #pragma omp parallel */ - - // Set threads' fan_team relationships: - -#pragma omp parallel - { - Impl::OpenMPexec & th = * Impl::OpenMPexec::m_thread[ omp_get_thread_num() ]; - - // Intra-team fan-in with root as the highest rank thread: - const int team_r = th.m_team_size - ( th.m_team_rank + 1 ); - - for ( int n = 1 ; ( team_r + n < th.m_team_size ) && ( 0 == ( n & team_r ) ) ; n <<= 1 ) { - th.m_fan_team[ th.m_fan_team_size++ ] = - Impl::OpenMPexec::find_thread( th.m_init_league_rank , th.m_team_size - ( team_r + n + 1 ) ); - } - // Intra-team scan: - { - int n ; - for ( n = 1 ; 0 == ( team_r & n ) && ( team_r + n < th.m_team_size ) ; n <<= 1 ); - if ( ( team_r & n ) && ( team_r + n < th.m_team_size ) ) { - th.m_fan_team[ th.m_fan_team_size ] = - Impl::OpenMPexec::find_thread( th.m_init_league_rank , th.m_team_size - ( team_r + n + 1 ) ); - } - else { - th.m_fan_team[ th.m_fan_team_size ] = 0 ; - } - } - } -/* END #pragma omp parallel */ - - Impl::OpenMPexec::resize_reduce_scratch( 4096 - Impl::OpenMPexec::REDUCE_TEAM_BASE ); - Impl::OpenMPexec::resize_shared_scratch( 4096 ); -} - -//---------------------------------------------------------------------------- - -void OpenMP::finalize() -{ - Impl::OpenMPexec::verify_initialized( "OpenMP::finalize" ); - Impl::OpenMPexec::verify_is_process( "OpenMP::finalize" ); - - Impl::OpenMPexec::resize_reduce_scratch(0); - Impl::OpenMPexec::resize_shared_scratch(0); - - for ( int i = 0 ; i < Impl::OpenMPexec::MAX_THREAD_COUNT ; ++i ) { - if ( Impl::OpenMPexec::m_thread[i] ) { delete Impl::OpenMPexec::m_thread[i] ; } - Impl::OpenMPexec::m_thread[i] = 0 ; - } - - omp_set_num_threads(0); - - hwloc::unbind_this_thread(); -} - -} // namespace Kokkos - diff --git a/kokkos/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp b/kokkos/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp deleted file mode 100644 index 7d61de5..0000000 --- a/kokkos/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp +++ /dev/null @@ -1,310 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_OPENMPEXEC_HPP -#define KOKKOS_OPENMPEXEC_HPP - -#include -#include - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- -/** \brief Data for OpenMP thread execution */ - -class OpenMPexec { -public: - - // Fan array has log_2(NT) reduction threads plus 2 scan threads - // Currently limited to 16k threads. - enum { MAX_FAN_COUNT = 16 }; - enum { MAX_THREAD_COUNT = 1 << ( MAX_FAN_COUNT - 2 ) }; - enum { VECTOR_LENGTH = 8 }; - enum { REDUCE_TEAM_BASE = 512 }; - - /** \brief Thread states for team synchronization */ - enum { Active , Rendezvous , ReductionAvailable , ScanAvailable }; - -private: - - friend class Kokkos::OpenMP ; - - void * m_reduce ; ///< Reduction memory - void * m_shared ; ///< Shared memory - int m_shared_end ; - int m_shared_iter ; - int volatile m_state_team ; - int m_fan_team_size ; - int m_team_rank ; - int m_team_size ; - int m_init_league_rank ; - int m_init_league_size ; - - int m_work_league_rank ; - int m_work_league_end ; - int m_work_league_size ; - - OpenMPexec * m_fan_team[ MAX_FAN_COUNT ]; - - static OpenMPexec * m_thread[ MAX_THREAD_COUNT ]; - - OpenMPexec(); - OpenMPexec( const OpenMPexec & ); - OpenMPexec & operator = ( const OpenMPexec & ); - -public: - - void * reduce_team() const { return m_reduce ; } - void * reduce_base() const { return ((unsigned char *)m_reduce) + REDUCE_TEAM_BASE ; } - - ~OpenMPexec(); - - OpenMPexec( const unsigned league_rank , - const unsigned league_size , - const unsigned team_rank , - const unsigned team_size ); - - static void finalize(); - - static void initialize( const unsigned team_count , - const unsigned threads_per_team , - const unsigned numa_count , - const unsigned cores_per_numa ); - - static void verify_is_process( const char * const ); - static void verify_initialized( const char * const ); - - static void resize_reduce_scratch( size_t ); - static void resize_shared_scratch( size_t ); - - inline static - OpenMPexec * get_thread( const unsigned entry ) { return m_thread[ entry ] ; } - - static - OpenMPexec * find_thread( const int init_league_rank , - const int team_rank ); - - //---------------------------------------------------------------------- - /** \brief Compute a range of work for this thread's rank */ - - inline - std::pair< size_t , size_t > - work_range( const size_t work_count ) const - { - typedef integral_constant< size_t , VECTOR_LENGTH - 1 > work_mask ; - - const size_t thread_size = m_team_size * m_work_league_size ; - - // work per thread rounded up and aligned to vector length: - - const size_t work_per_thread = - ( ( ( work_count + thread_size - 1 ) / thread_size ) + work_mask::value ) & ~(work_mask::value); - - const size_t work_begin = std::min( work_count , work_per_thread * ( m_team_rank + m_team_size * m_work_league_rank ) ); - const size_t work_end = std::min( work_count , work_per_thread + work_begin ); - - return std::pair< size_t , size_t >( work_begin , work_end ); - } - - //---------------------------------------------------------------------- - - void * get_shmem( const int ); - - void team_barrier() - { - const bool not_root = m_team_rank + 1 < m_team_size ; - - for ( int i = 0 ; i < m_fan_team_size ; ++i ) { - spinwait( m_fan_team[i]->m_state_team , OpenMPexec::Active ); - } - if ( not_root ) { - m_state_team = Rendezvous ; - spinwait( m_state_team , OpenMPexec::Rendezvous ); - } - for ( int i = 0 ; i < m_fan_team_size ; ++i ) { - m_fan_team[i]->m_state_team = OpenMPexec::Active ; - } - } - - // Called within a parallel region - template< class ArgType > - inline - ArgType team_scan( const ArgType & value , ArgType * const global_accum = 0 ) - { - // Sequence of m_state_team states: - // 0) Active : entry and exit state - // 1) ReductionAvailable : reduction value available, waiting for scan value - // 2) ScanAvailable : reduction value available, scan value available - // 3) Rendezvous : broadcasting global iinter-team accumulation value - - // Make sure there is enough scratch space: - typedef typename if_c< 2 * sizeof(ArgType) < REDUCE_TEAM_BASE , ArgType , void >::type type ; - - const bool not_root = m_team_rank + 1 < m_team_size ; - - type * const work_value = (type*) reduce_team(); - - // OpenMPexec::Active == m_state_team - - work_value[0] = value ; - - // Fan-in reduction, wait for source thread to complete it's fan-in reduction. - for ( int i = 0 ; i < m_fan_team_size ; ++i ) { - OpenMPexec & th = *m_fan_team[i]; - - // Wait for source thread to exit Active state. - Impl::spinwait( th.m_state_team , OpenMPexec::Active ); - // Source thread is 'ReductionAvailable' or 'ScanAvailable' - work_value[0] += ((volatile type*)th.reduce_team())[0]; - } - - work_value[1] = work_value[0] ; - - if ( not_root ) { - - m_state_team = OpenMPexec::ReductionAvailable ; // Reduction value is available. - - // Wait for contributing threads' scan value to be available. - if ( m_fan_team[ m_fan_team_size ] ) { - OpenMPexec & th = *m_fan_team[ m_fan_team_size ] ; - - // Wait: Active -> ReductionAvailable - Impl::spinwait( th.m_state_team , OpenMPexec::Active ); - // Wait: ReductionAvailable -> ScanAvailable: - Impl::spinwait( th.m_state_team , OpenMPexec::ReductionAvailable ); - - work_value[1] += ((volatile type*)th.reduce_team())[1] ; - } - - m_state_team = OpenMPexec::ScanAvailable ; // Scan value is available. - } - else { - // Root thread add team's total to global inter-team accumulation - work_value[0] = global_accum ? atomic_fetch_add( global_accum , work_value[0] ) : 0 ; - } - - for ( int i = 0 ; i < m_fan_team_size ; ++i ) { - OpenMPexec & th = *m_fan_team[i]; - // Wait: ReductionAvailable -> ScanAvailable - Impl::spinwait( th.m_state_team , OpenMPexec::ReductionAvailable ); - // Wait: ScanAvailable -> Rendezvous - Impl::spinwait( th.m_state_team , OpenMPexec::ScanAvailable ); - } - - // All fan-in threads are in the ScanAvailable state - if ( not_root ) { - m_state_team = OpenMPexec::Rendezvous ; - Impl::spinwait( m_state_team , OpenMPexec::Rendezvous ); - } - - // Broadcast global inter-team accumulation value - volatile type & global_val = work_value[0] ; - for ( int i = 0 ; i < m_fan_team_size ; ++i ) { - OpenMPexec & th = *m_fan_team[i]; - ((volatile type*)th.reduce_team())[0] = global_val ; - th.m_state_team = OpenMPexec::Active ; - } - // Exclusive scan, subtract contributed value - return global_val + work_value[1] - value ; - } - - - inline - void team_work_init( int work_league_size ) - { - const int work_per_team = ( work_league_size + m_init_league_size - 1 ) / m_init_league_size ; - m_work_league_rank = std::min( work_league_size , work_per_team * m_init_league_rank ); - m_work_league_end = std::min( work_league_size , work_per_team + m_work_league_rank ); - m_work_league_size = work_league_size ; - } - - inline - bool team_work_avail() - { - m_shared_iter = 0 ; - const bool avail = m_work_league_rank < m_work_league_end ; - if ( ! avail ) { - m_work_league_rank = m_init_league_rank ; - m_work_league_end = m_init_league_rank + 1 ; - m_work_league_size = m_init_league_size ; - } - return avail ; - } - - inline - void team_work_next() - { if ( ++m_work_league_rank < m_work_league_end ) team_barrier(); } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -inline OpenMP::OpenMP( Impl::OpenMPexec & e ) : m_exec(e) {} - -inline int OpenMP::league_rank() const { return m_exec.m_work_league_rank ; } -inline int OpenMP::league_size() const { return m_exec.m_work_league_size ; } -inline int OpenMP::team_rank() const { return m_exec.m_team_rank ; } -inline int OpenMP::team_size() const { return m_exec.m_team_size ; } - -inline void OpenMP::team_barrier() { m_exec.team_barrier() ; } - -inline void * OpenMP::get_shmem( const int size ) { return m_exec.get_shmem(size) ; } - -template< typename Type > -inline Type OpenMP::team_scan( const Type & value ) -{ return m_exec.team_scan( value ); } - -template< typename TypeLocal , typename TypeGlobal > -inline TypeGlobal OpenMP::team_scan( const TypeLocal & value , TypeGlobal * const global_accum ) -{ return m_exec.template team_scan< TypeGlobal >( value , global_accum ); } - -} // namespace Kokkos - -#endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */ - diff --git a/kokkos/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/kokkos/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp deleted file mode 100644 index c035ade..0000000 --- a/kokkos/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ /dev/null @@ -1,946 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { -namespace { - -ThreadsExec s_threads_process ; -ThreadsExec * s_threads_exec[ ThreadsExec::MAX_THREAD_COUNT ]; -std::pair s_threads_coord[ ThreadsExec::MAX_THREAD_COUNT ]; -std::string s_exception_msg ; - -unsigned s_threads_count = 0 ; -unsigned s_threads_reduce_size = 0 ; -unsigned s_threads_shared_size = 0 ; - -void (* volatile s_current_function)( ThreadsExec & , const void * ); -const void * volatile s_current_function_arg = 0 ; - -struct Sentinel { - Sentinel() - { - HostSpace::register_in_parallel( ThreadsExec::in_parallel ); - } - - ~Sentinel() - { - if ( s_threads_count || - s_threads_reduce_size || - s_threads_shared_size || - s_current_function || - s_current_function_arg || - s_threads_exec[0] ) { - std::cerr << "ERROR : Process exiting without calling Kokkos::Threads::terminate()" << std::endl ; - } - } -}; - -} // namespace -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -void ThreadsExec::driver(void) -{ - // If hardware locality library unavailable then pass in the rank. - - size_t thread_rank = (size_t) s_current_function_arg ; - - if ( s_threads_count <= thread_rank ) { - thread_rank = Kokkos::hwloc::bind_this_thread( s_threads_count , s_threads_coord ); - } - - if ( s_threads_count <= thread_rank || 0 != ((ThreadsExec * volatile *)s_threads_exec)[ thread_rank ] ) { - - // An error occured. Inform process that thread is terminating - s_threads_process.m_state = ThreadsExec::Terminating ; - - return ; - } - - { - ThreadsExec this_thread ; - - this_thread.m_state = ThreadsExec::Active ; - - // Try to protect against cache coherency failure by casting to volatile. - ((ThreadsExec * volatile *)s_threads_exec)[ thread_rank ] = & this_thread ; - // Really need a memory fence here. - - // Inform spawning process that the threads_exec entry has been set. - s_threads_process.m_state = ThreadsExec::Active ; - - while ( ThreadsExec::Active == this_thread.m_state ) { - -#if 0 - try { - // Call work function - (*s_current_function)( this_thread , s_current_function_arg ); - } - catch( const std::exception & x ) { - std::ostringstream msg ; - msg << "Kokkos::Threads[" << thread_rank << "] Uncaught exeception : " << x.what() << std::endl ; - s_exception_msg.append( msg.str() ); - } - catch( ... ) { - std::ostringstream msg ; - msg << "Kokkos::Threads[" << thread_rank << "] Uncaught exeception" << std::endl ; - s_exception_msg.append( msg.str() ); - } -#else - (*s_current_function)( this_thread , s_current_function_arg ); -#endif - - // Deactivate thread and wait for reactivation - this_thread.m_state = ThreadsExec::Inactive ; - wait_yield( this_thread.m_state , ThreadsExec::Inactive ); - } - - s_threads_process.m_state = ThreadsExec::Terminating ; - - ((ThreadsExec * volatile * )s_threads_exec)[ thread_rank ] = 0 ; - } -} - -void execute_function_noop( ThreadsExec & , const void * ) {} - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -ThreadsExec::~ThreadsExec() -{ - m_reduce = 0 ; - m_shared = 0 ; - m_shared_end = 0 ; - m_shared_iter = 0 ; - m_state = ThreadsExec::Terminating ; - m_state_team = ThreadsExec::Inactive ; - m_fan_size = 0 ; - m_fan_team_size = 0 ; - - m_team_rank = 0 ; - m_team_size = 0 ; - m_init_league_rank = 0 ; - m_init_league_size = 0 ; - m_init_thread_rank = 0 ; - m_init_thread_size = 0 ; - - m_work_league_rank = 0 ; - m_work_league_end = 0 ; - m_work_league_size = 0 ; - - for ( unsigned i = 0 ; i < MAX_FAN_COUNT ; ++i ) { m_fan[i] = 0 ; } - for ( unsigned i = 0 ; i < MAX_FAN_COUNT ; ++i ) { m_fan_team[i] = 0 ; } -} - -ThreadsExec::ThreadsExec() - : m_reduce(0) - , m_shared(0) - , m_shared_end(0) - , m_shared_iter(0) - , m_state( ThreadsExec::Terminating ) - , m_state_team( ThreadsExec::Inactive ) - - , m_fan_size(0) - , m_fan_team_size(0) - - , m_team_rank(0) - , m_team_size(0) - , m_init_league_rank(0) - , m_init_league_size(0) - , m_init_thread_rank(0) - , m_init_thread_size(0) - - , m_work_league_rank(0) - , m_work_league_end(0) - , m_work_league_size(0) -{ - for ( unsigned i = 0 ; i < MAX_FAN_COUNT ; ++i ) { m_fan[i] = 0 ; } - for ( unsigned i = 0 ; i < MAX_FAN_COUNT ; ++i ) { m_fan_team[i] = 0 ; } - - if ( & s_threads_process == this ) { - m_state = ThreadsExec::Inactive ; - m_team_rank = 0 ; - m_team_size = 1 ; - m_init_league_rank = 0 ; - m_init_league_size = 1 ; - m_init_thread_rank = 0 ; - m_init_thread_size = 1 ; - - m_work_league_rank = 0 ; - m_work_league_end = 1 ; - m_work_league_size = 1 ; - } -} - -int ThreadsExec::get_thread_count() -{ - return s_threads_count ; -} - -ThreadsExec * ThreadsExec::get_thread( const int init_thread_rank ) -{ - ThreadsExec * const th = - unsigned(init_thread_rank) < s_threads_count - ? s_threads_exec[ s_threads_count - ( init_thread_rank + 1 ) ] : 0 ; - - if ( 0 == th || th->m_init_thread_rank != init_thread_rank ) { - std::ostringstream msg ; - msg << "Kokkos::Impl::ThreadsExec::get_thread ERROR : " - << "thread " << init_thread_rank << " of " << s_threads_count ; - if ( 0 == th ) { - msg << " does not exist" ; - } - else { - msg << " has wrong thread_rank " << th->m_init_thread_rank ; - } - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - - return th ; -} - -// Set threads' team and initial league sizes. -// Set threads' global and team fan-in and scan relationsups. -// If the process thread is used then it is 's_threads_exec[0]' -// which we map to the maximum rank so that the scan's reduction -// places data on the proper thread. -void ThreadsExec::set_threads_relationships( const std::pair team_topo ) -{ - const unsigned league_size = team_topo.first ; - const unsigned team_size = team_topo.second ; - const unsigned thread_count = league_size * team_size ; - - for ( unsigned r = 0 ; r < thread_count ; ++r ) { - if ( s_threads_exec[r] == 0 ) { - Kokkos::Impl::throw_runtime_exception( std::string("ThreadsExec::set_threads_relationships FAILED : NULL entry" ) ); - } - } - - for ( unsigned league_r = 0 , th_r = 0 ; league_r < league_size ; ++league_r ) { - for ( unsigned team_r = 0 ; team_r < team_size ; ++team_r , ++th_r ) { - - ThreadsExec & th = * s_threads_exec[th_r] ; - - th.m_team_rank = team_size - ( team_r + 1 ); - th.m_team_size = team_size ; - th.m_init_league_rank = league_size - ( league_r + 1 ); - th.m_init_league_size = league_size ; - th.m_init_thread_rank = th.m_team_rank + team_size * th.m_init_league_rank ; - th.m_init_thread_size = team_size * league_size ; - - th.m_work_league_rank = league_r ; - th.m_work_league_end = league_r + 1 ; - th.m_work_league_size = team_topo.first ; - - th.m_fan_size = 0 ; - th.m_fan_team_size = 0 ; - - //------------------------------------ - // Intra-team reduction: - const unsigned team_begin = league_r * team_size ; - for ( int n = 1 ; ( team_r + n < team_size ) && ( 0 == ( n & team_r ) ) ; n <<= 1 , ++th.m_fan_team_size ) { - th.m_fan_team[ th.m_fan_team_size ] = s_threads_exec[ team_begin + team_r + n ]; - } - // Intra-team scan input: - { - unsigned n ; - for ( n = 1 ; 0 == ( team_r & n ) && ( team_r + n < team_size ) ; n <<= 1 ); - if ( ( team_r & n ) && ( team_r + n < team_size ) ) { - th.m_fan_team[ th.m_fan_team_size ] = s_threads_exec[ team_begin + team_r + n ]; - } - else { - th.m_fan_team[ th.m_fan_team_size ] = 0 ; - } - } - //------------------------------------ - // All-thread reduction: - for ( unsigned n = 1 ; ( th_r + n < thread_count ) && ( 0 == ( n & th_r ) ) ; n <<= 1 , ++th.m_fan_size ) { - th.m_fan[ th.m_fan_size ] = s_threads_exec[ th_r + n ]; - } - // All-thread Scan input: - { - unsigned n ; - for ( n = 1 ; 0 == ( th_r & n ) && ( th_r + n < thread_count ) ; n <<= 1 ); - if ( ( th_r & n ) && ( th_r + n < thread_count ) ) { - th.m_fan[ th.m_fan_size ] = s_threads_exec[ th_r + n ]; - } - else { - th.m_fan[ th.m_fan_size ] = 0 ; - } - } - th.m_fan[ th.m_fan_size + 1 ] = th_r + 1 < thread_count ? s_threads_exec[ th_r + 1 ] : 0 ; - //------------------------------------ - }} -} - -void ThreadsExec::execute_get_binding( ThreadsExec & exec , const void * ) -{ - const size_t init_thread_rank = exec.m_team_rank + exec.m_team_size * exec.m_init_league_rank ; - s_threads_coord[ init_thread_rank ] = Kokkos::hwloc::get_this_thread_coordinate(); -} - -void ThreadsExec::execute_sleep( ThreadsExec & exec , const void * ) -{ - ThreadsExec::global_lock(); - ThreadsExec::global_unlock(); - - const int n = exec.m_fan_size ; - - for ( int i = 0 ; i < n ; ++i ) { - Impl::spinwait( exec.m_fan[i]->m_state , ThreadsExec::Active ); - } - - exec.m_state = ThreadsExec::Inactive ; -} - -void ThreadsExec::execute_reduce_resize( ThreadsExec & exec , const void * ) -{ - if ( exec.m_reduce ) { - HostSpace::decrement( exec.m_reduce ); - exec.m_reduce = 0 ; - } - - if ( s_threads_reduce_size ) { - - exec.m_reduce = - HostSpace::allocate( "reduce_scratch_space" , typeid(unsigned char) , 1 , s_threads_reduce_size ); - - // Guaranteed multiple of 'unsigned' - - unsigned * ptr = (unsigned *)( exec.m_reduce ); - unsigned * const end = ptr + s_threads_reduce_size / sizeof(unsigned); - - // touch on this thread - while ( ptr < end ) *ptr++ = 0 ; - } -} - -void ThreadsExec::execute_shared_resize( ThreadsExec & exec , const void * ) -{ - const bool not_root = exec.m_team_rank + 1 < exec.m_team_size ; - - if ( not_root ) { - exec.m_shared = 0 ; - } - else { - - if ( exec.m_shared ) { - HostSpace::decrement( exec.m_shared ); - exec.m_shared = 0 ; - } - - if ( s_threads_shared_size ) { - - exec.m_shared = - HostSpace::allocate( "shared_scratch_space" , typeid(unsigned char) , 1 , s_threads_shared_size ); - - // Guaranteed multiple of 'unsigned' - - unsigned * ptr = (unsigned *)( exec.m_shared ); - unsigned * const end = ptr + s_threads_shared_size / sizeof(unsigned); - - // touch on this thread - while ( ptr < end ) *ptr++ = 0 ; - } - } - - exec.m_shared_end = s_threads_shared_size ; -} - -void * ThreadsExec::get_shmem( const int size ) -{ - // m_shared_iter is in bytes, convert to integer offsets - const int offset = m_shared_iter >> power_of_two::value ; - - m_shared_iter += size ; - - if ( m_shared_end < m_shared_iter ) { - Kokkos::Impl::throw_runtime_exception( std::string("ThreadsExec::get_shmem FAILED : exceeded shared memory size" ) ); - } - - return ((int*)m_shared) + offset ; -} - -} -} - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -void ThreadsExec::verify_is_process( const std::string & name , const bool initialized ) -{ - if ( ! is_process() ) { - std::string msg( name ); - msg.append( " FAILED : Called by a worker thread, can only be called by the master process." ); - Kokkos::Impl::throw_runtime_exception( msg ); - } - - if ( initialized && 0 == s_threads_count ) { - std::string msg( name ); - msg.append( " FAILED : Threads not initialized." ); - Kokkos::Impl::throw_runtime_exception( msg ); - } -} - -int ThreadsExec::in_parallel() -{ - // A thread function is in execution and - // the function argument is not the special threads process argument and - // the master process is a worker or is not the master process. - return s_current_function && - ( & s_threads_process != s_current_function_arg ) && - ( s_threads_process.m_team_size || ! is_process() ); -} - -// Wait for root thread to become inactive -void ThreadsExec::fence() -{ - if ( s_threads_count ) { - // Wait for the root thread to complete: - Impl::spinwait( s_threads_exec[0]->m_state , ThreadsExec::Active ); - - if ( s_exception_msg.size() ) { - Kokkos::Impl::throw_runtime_exception( s_exception_msg ); - } - } - - s_current_function = 0 ; - s_current_function_arg = 0 ; -} - -/** \brief Begin execution of the asynchronous functor */ -void ThreadsExec::start( void (*func)( ThreadsExec & , const void * ) , const void * arg , int work_league_size ) -{ - verify_is_process("ThreadsExec::start" , false ); - - if ( s_current_function || s_current_function_arg ) { - Kokkos::Impl::throw_runtime_exception( std::string( "ThreadsExec::start() FAILED : already executing" ) ); - } - - s_exception_msg.clear(); - - s_current_function = func ; - s_current_function_arg = arg ; - - if ( work_league_size ) { - const int work_per_team = ( work_league_size + s_threads_process.m_init_league_size - 1 ) - / s_threads_process.m_init_league_size ; - - for ( int i = s_threads_count ; 0 < i-- ; ) { - ThreadsExec & th = * s_threads_exec[i] ; - - th.m_work_league_rank = std::min( th.m_init_league_rank * work_per_team , work_league_size ); - th.m_work_league_end = std::min( th.m_work_league_rank + work_per_team , work_league_size ); - th.m_work_league_size = work_league_size ; - } - } - - // Activate threads: - for ( int i = s_threads_count ; 0 < i-- ; ) { - s_threads_exec[i]->m_state = ThreadsExec::Active ; - } - - if ( s_threads_process.m_team_size ) { - // Master process is the root thread: - (*func)( s_threads_process , arg ); - s_threads_process.m_state = ThreadsExec::Inactive ; - } -} - -//---------------------------------------------------------------------------- - -bool ThreadsExec::sleep() -{ - verify_is_process("ThreadsExec::sleep", true ); - - if ( & execute_sleep == s_current_function ) return false ; - - fence(); - - ThreadsExec::global_lock(); - - s_exception_msg.clear(); - - s_current_function = & execute_sleep ; - - // Activate threads: - for ( unsigned i = s_threads_count ; 0 < i ; ) { - s_threads_exec[--i]->m_state = ThreadsExec::Active ; - } - - return true ; -} - -bool ThreadsExec::wake() -{ - verify_is_process("ThreadsExec::wake", true ); - - if ( & execute_sleep != s_current_function ) return false ; - - ThreadsExec::global_unlock(); - - if ( s_threads_process.m_team_size ) { - execute_sleep( s_threads_process , 0 ); - s_threads_process.m_state = ThreadsExec::Inactive ; - } - - fence(); - - return true ; -} - -//---------------------------------------------------------------------------- - -void ThreadsExec::execute_serial( void (*func)( ThreadsExec & , const void * ) ) -{ - s_exception_msg.clear(); - - s_current_function = func ; - s_current_function_arg = & s_threads_process ; - - const unsigned begin = s_threads_process.m_team_size ? 1 : 0 ; - - for ( unsigned i = s_threads_count ; begin < i ; ) { - ThreadsExec & th = * s_threads_exec[ --i ]; - - th.m_state = ThreadsExec::Active ; - - wait_yield( th.m_state , ThreadsExec::Active ); - } - - if ( s_threads_process.m_team_size ) { - s_threads_process.m_state = ThreadsExec::Active ; - (*func)( s_threads_process , 0 ); - s_threads_process.m_state = ThreadsExec::Inactive ; - } - - s_current_function_arg = 0 ; - s_current_function = 0 ; -} - -//---------------------------------------------------------------------------- - -void * ThreadsExec::root_reduce_scratch() -{ - return s_threads_process.reduce_base(); -} - -void ThreadsExec::resize_reduce_scratch( size_t size ) -{ - fence(); - - if ( size ) { size += REDUCE_TEAM_BASE ; } - - const size_t rem = size % Kokkos::Impl::MEMORY_ALIGNMENT ; - - if ( rem ) size += Kokkos::Impl::MEMORY_ALIGNMENT - rem ; - - if ( ( s_threads_reduce_size < size ) || - ( 0 == size && s_threads_reduce_size ) ) { - - verify_is_process( "ThreadsExec::resize_reduce_scratch" , true ); - - s_threads_reduce_size = size ; - - execute_serial( & execute_reduce_resize ); - - s_threads_process.m_reduce = s_threads_exec[0]->m_reduce ; - } -} - -void ThreadsExec::resize_shared_scratch( size_t size ) -{ - fence(); - - const size_t rem = size % Kokkos::Impl::MEMORY_ALIGNMENT ; - - if ( rem ) size += Kokkos::Impl::MEMORY_ALIGNMENT - rem ; - - if ( s_threads_shared_size < size || ( 0 == size && s_threads_shared_size ) ) { - - verify_is_process( "ThreadsExec::resize_shared_scratch" , true ); - - s_threads_shared_size = size ; - - execute_serial( & execute_shared_resize ); - - for ( unsigned i = 0 ; i < s_threads_count ; ) { - ThreadsExec & team_th = * s_threads_exec[i] ; - - for ( int j = 0 ; j < team_th.m_team_size ; ++j , ++i ) { - s_threads_exec[i]->m_shared = team_th.m_shared ; - } - } - - s_threads_process.m_shared = s_threads_exec[0]->m_shared ; - } -} - -//---------------------------------------------------------------------------- - -void ThreadsExec::print_configuration( std::ostream & s , const bool detail ) -{ - verify_is_process("ThreadsExec::print_configuration",false); - - fence(); - - const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - -#if defined( KOKKOS_HAVE_HWLOC ) - s << "macro KOKKOS_HAVE_HWLOC : defined" << std::endl ; -#endif -#if defined( KOKKOS_HAVE_PTHREAD ) - s << "macro KOKKOS_HAVE_PTHREAD : defined" << std::endl ; -#endif - - s << "Kokkos::Threads hwloc[" << numa_count << "x" << cores_per_numa << "x" << threads_per_core << "]" ; - - if ( s_threads_exec[0] ) { - s << " team_league[" << s_threads_exec[0]->m_init_league_size << "x" << s_threads_exec[0]->m_team_size << "]" ; - if ( 0 == s_threads_process.m_team_size ) { s << " Asynchronous" ; } - s << " ReduceScratch[" << s_threads_reduce_size << "]" - << " SharedScratch[" << s_threads_shared_size << "]" ; - s << std::endl ; - - if ( detail ) { - - execute_serial( & execute_get_binding ); - - for ( unsigned i = 0 ; i < s_threads_count ; ++i ) { - ThreadsExec * const th = s_threads_exec[i] ; - s << " Thread hwloc(" - << s_threads_coord[i].first << "," - << s_threads_coord[i].second << ")" ; - - s_threads_coord[i].first = ~0u ; - s_threads_coord[i].second = ~0u ; - - if ( th ) { - s << " rank(" << th->m_init_league_rank << "." << th->m_team_rank << ")" ; - if ( th->m_fan_size ) { - s << " Fan ranks" ; - for ( int j = 0 ; j < th->m_fan_size ; ++j ) { - s << " (" << th->m_fan[j]->m_init_league_rank << "." << th->m_fan[j]->m_team_rank << ")" ; - } - } - } - s << std::endl ; - } - } - } - else { - s << " not initialized" << std::endl ; - } -} - -//---------------------------------------------------------------------------- - -int ThreadsExec::league_max() -{ return std::numeric_limits::max(); } - -int ThreadsExec::team_max() -{ return s_threads_exec[0] ? s_threads_exec[0]->m_team_size : 1 ; } - -//---------------------------------------------------------------------------- - -int ThreadsExec::is_initialized() -{ return 0 != s_threads_exec[0] ; } - -void ThreadsExec::initialize( - const std::pair team_topology , - std::pair use_core_topology ) -{ - static const Sentinel sentinel ; - - verify_is_process("ThreadsExec::initialize",false); - - std::ostringstream msg ; - - msg << "Kokkos::Threads::initialize(" - << " team_topology(" << team_topology.first << "," << team_topology.second << ")" - << ", use_core_topology(" << use_core_topology.first << "," << use_core_topology.second << ")" - << " )" ; - - if ( s_threads_count ) { - msg << " FAILED : Already initialized" ; - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - - const unsigned thread_count = team_topology.first * team_topology.second ; - - if ( 0 == thread_count ) { - msg << " FAILED : zero thread count" ; - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - //------------------------------------ - // Query hardware topology and capacity, if available. - - const bool hwloc_avail = Kokkos::hwloc::available(); - - const std::pair - hwloc_core_topo( Kokkos::hwloc::get_available_numa_count() , - Kokkos::hwloc::get_available_cores_per_numa() ); - - std::pair master_coord = Kokkos::hwloc::get_this_thread_coordinate(); - bool asynchronous = false ; - - if ( hwloc_avail && 1 < thread_count ) { - - if ( 0 == use_core_topology.first && 0 == use_core_topology.second ) { - use_core_topology = Kokkos::hwloc::use_core_topology( thread_count ); - } - - if ( use_core_topology.first < hwloc_core_topo.first ) { - // Can omit a (NUMA) group of cores and execute work asynchronously - // on the other groups. - - Kokkos::hwloc::thread_mapping( team_topology , use_core_topology , hwloc_core_topo , s_threads_coord ); - - // Don't use master thread's first core coordinate (NUMA region). - // Originally mapped: - // begin = hwloc_core_topo.first - use_core_topology.first ; - // end = hwloc_core_topo.first ; - // So can decrement. - - for ( unsigned i = 0 ; i < thread_count ; ++i ) { - if ( s_threads_coord[i].first <= master_coord.first ) { - --( s_threads_coord[i].first ); - } - } - - asynchronous = true ; - } - else if ( use_core_topology.second < hwloc_core_topo.second ) { - // Can omit a core from each group and execute work asynchronously - - Kokkos::hwloc::thread_mapping( team_topology , use_core_topology , hwloc_core_topo , s_threads_coord ); - - // Threads' coordinates are in the range - // 0 <= numa_begin = hwloc_core_topo.first - use_core_topology.first - // 1 <= numa_end = hwloc_core_topo.first - // 1 <= core_begin = hwloc_core_topo.second - use_core_topology.second - // 1 <= core_end = hwloc_core_topo.second - // - // range: ( [numa_begin,numa_end) , [core_begin,core_end) ) - // - // Force master thread onto the highest rank unused core of its current numa region. - // - master_coord.second = ( hwloc_core_topo.second - use_core_topology.second ) - 1 ; - - asynchronous = true ; - } - else { - // Spawn threads with root thread on the master process' core - - Kokkos::hwloc::thread_mapping( team_topology , use_core_topology , hwloc_core_topo , master_coord , s_threads_coord ); - - s_threads_coord[0] = std::pair( ~0u , ~0u ); - } - } - - //------------------------------------ - // Spawn threads - - { - const unsigned thread_spawn_begin = asynchronous ? 0 : 1 ; - unsigned thread_spawn_failed = 0 ; - - s_threads_count = thread_count ; - s_current_function = & execute_function_noop ; // Initialization work function - - // If not fully utilizing the capacity then spawn threads for asynchronous execution. - - for ( unsigned i = thread_spawn_begin ; i < thread_count ; ++i ) { - - s_threads_process.m_state = ThreadsExec::Inactive ; - - // If hwloc available then spawned thread will choose its own rank, - // otherwise specify the rank. - s_current_function_arg = (void*)( hwloc_avail ? ~0u : i ); - - // Spawn thread executing the 'driver()' function. - // Wait until spawned thread has attempted to initialize. - // If spawning and initialization is successfull then - // an entry in 's_threads_exec' will be assigned. - if ( ThreadsExec::spawn() ) { - wait_yield( s_threads_process.m_state , ThreadsExec::Inactive ); - } - } - - // Wait for all spawned threads to deactivate before zeroing the function. - - for ( unsigned i = thread_spawn_begin ; i < thread_count ; ++i ) { - // Try to protect against cache coherency failure by casting to volatile. - ThreadsExec * const th = ((ThreadsExec * volatile *)s_threads_exec)[i] ; - if ( th ) { - wait_yield( th->m_state , ThreadsExec::Active ); - } - else { - ++thread_spawn_failed ; - } - } - - s_current_function = 0 ; - s_current_function_arg = 0 ; - - if ( thread_spawn_failed ) { - - s_threads_count = 0 ; - - msg << " FAILED " << thread_spawn_failed << " attempts to spawn threads" ; - - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - - if ( 1 < thread_count ) { Kokkos::hwloc::bind_this_thread( master_coord ); } - - // Clear master thread data. - // The master thread will be unused or initialized - // as part of the thread pool. - - s_threads_process.m_team_rank = 0 ; - s_threads_process.m_team_size = 0 ; - s_threads_process.m_init_league_rank = 0 ; - s_threads_process.m_init_league_size = 0 ; - s_threads_process.m_init_thread_rank = 0 ; - s_threads_process.m_init_thread_size = 0 ; - s_threads_process.m_work_league_rank = 0 ; - s_threads_process.m_work_league_end = 0 ; - s_threads_process.m_work_league_size = 0 ; - s_threads_process.m_state = ThreadsExec::Inactive ; - - if ( thread_spawn_begin ) { - s_threads_exec[0] = & s_threads_process ; // Include the master thread in pool. - } - } - - //------------------------------------ - // Initialize team topology and fan-in/out relationships: - - s_threads_process.m_init_league_size = team_topology.first ; - - ThreadsExec::set_threads_relationships( team_topology ); - - // Initial allocations: - ThreadsExec::resize_reduce_scratch( 4096 - REDUCE_TEAM_BASE ); - ThreadsExec::resize_shared_scratch( 4096 ); -} - -//---------------------------------------------------------------------------- - -void ThreadsExec::finalize() -{ - verify_is_process("ThreadsExec::finalize",false); - - fence(); - - resize_reduce_scratch(0); - resize_shared_scratch(0); - - const unsigned begin = s_threads_process.m_team_size ? 1 : 0 ; - - for ( unsigned i = s_threads_count ; begin < i-- ; ) { - - if ( s_threads_exec[i] ) { - - s_threads_exec[i]->m_state = ThreadsExec::Terminating ; - - wait_yield( s_threads_process.m_state , ThreadsExec::Inactive ); - - s_threads_process.m_state = ThreadsExec::Inactive ; - } - } - - if ( s_threads_process.m_team_size ) { - ( & s_threads_process )->~ThreadsExec(); - s_threads_exec[0] = 0 ; - } - - Kokkos::hwloc::unbind_this_thread(); - - s_threads_count = 0 ; - - // Reset master thread to run solo. - s_threads_process.m_team_rank = 0 ; - s_threads_process.m_team_size = 1 ; - s_threads_process.m_init_league_rank = 0 ; - s_threads_process.m_init_league_size = 1 ; - s_threads_process.m_init_thread_rank = 0 ; - s_threads_process.m_init_thread_size = 1 ; - - s_threads_process.m_work_league_rank = 0 ; - s_threads_process.m_work_league_end = 1 ; - s_threads_process.m_work_league_size = 1 ; - s_threads_process.m_state = ThreadsExec::Inactive ; -} - -//---------------------------------------------------------------------------- - -} /* namespace Impl */ -} /* namespace Kokkos */ - - diff --git a/kokkos/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/kokkos/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp deleted file mode 100644 index ec7cd02..0000000 --- a/kokkos/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp +++ /dev/null @@ -1,557 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_THREADSEXEC_HPP -#define KOKKOS_THREADSEXEC_HPP - -#include - -#include -#include - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -template< class > struct ThreadsExecAdapter ; - -//---------------------------------------------------------------------------- - -class ThreadsExec { -public: - - // Fan array has log_2(NT) reduction threads plus 2 scan threads - // Currently limited to 16k threads. - enum { MAX_FAN_COUNT = 16 }; - enum { MAX_THREAD_COUNT = 1 << ( MAX_FAN_COUNT - 2 ) }; - enum { VECTOR_LENGTH = 8 }; - - /** \brief States of a worker thread */ - enum { Terminating ///< Termination in progress - , Inactive ///< Exists, waiting for work - , Active ///< Exists, performing work - , Rendezvous ///< Exists, waiting in a barrier or reduce - - , ScanAvailable - , ReductionAvailable - }; - -private: - - friend class Kokkos::Threads ; - - // Fan-in operations' root is the highest ranking thread - // to place the 'scan' reduction intermediate values on - // the threads that need them. - // For a simple reduction the thread location is arbitrary. - - /** \brief Reduction memory reserved for team reductions */ - enum { REDUCE_TEAM_BASE = 512 }; - - void * m_reduce ; ///< Reduction memory - void * m_shared ; ///< Team-shared memory - int m_shared_end ; ///< End of team-shared memory - int m_shared_iter ; ///< Current offset for team-shared memory - int volatile m_state ; ///< State for global synchronizations - int volatile m_state_team ; ///< State for team synchronizations - int m_fan_size ; - int m_fan_team_size ; - - int m_team_rank ; - int m_team_size ; - int m_init_league_rank ; - int m_init_league_size ; - int m_init_thread_rank ; - int m_init_thread_size ; - - int m_work_league_rank ; - int m_work_league_end ; - int m_work_league_size ; - - ThreadsExec * m_fan[ MAX_FAN_COUNT ] ; - ThreadsExec * m_fan_team[ MAX_FAN_COUNT ] ; - - static void global_lock(); - static void global_unlock(); - static bool spawn(); - - static void execute_sleep( ThreadsExec & , const void * ); - static void execute_reduce_resize( ThreadsExec & , const void * ); - static void execute_shared_resize( ThreadsExec & , const void * ); - static void execute_get_binding( ThreadsExec & , const void * ); - - ThreadsExec( const ThreadsExec & ); - ThreadsExec & operator = ( const ThreadsExec & ); - - static void execute_serial( void (*)( ThreadsExec & , const void * ) ); - - inline void * reduce_team() const { return m_reduce ; } - -public: - - static int get_thread_count(); - static ThreadsExec * get_thread( const int init_thread_rank ); - - inline void * reduce_base() const { return ((unsigned char *) m_reduce) + REDUCE_TEAM_BASE ; } - - static void driver(void); - - ~ThreadsExec(); - ThreadsExec(); - - static void set_threads_relationships( const std::pair team_topo ); - - static void resize_reduce_scratch( size_t ); - static void resize_shared_scratch( size_t ); - - static void * root_reduce_scratch(); - - static bool is_process(); - - static void verify_is_process( const std::string & , const bool initialized ); - - static int is_initialized(); - - static void initialize( const std::pair team_topo , - std::pair core_topo ); - - static void finalize(); - - static void print_configuration( std::ostream & , const bool detail = false ); - - //------------------------------------ - - static void wait_yield( volatile int & , const int ); - - //------------------------------------ - // All-thread functions: - - inline - std::pair< size_t , size_t > - work_range( const size_t work_count ) const - { - typedef integral_constant< size_t , VECTOR_LENGTH - 1 > work_mask ; - - // work per thread rounded up and aligned to vector length: - - const size_t work_per_thread = - ( ( ( work_count + m_init_thread_size - 1 ) / m_init_thread_size ) + work_mask::value ) & ~(work_mask::value); - - const size_t work_begin = std::min( work_count , work_per_thread * m_init_thread_rank ); - const size_t work_end = std::min( work_count , work_per_thread + work_begin ); - - return std::pair< size_t , size_t >( work_begin , work_end ); - } - - template< class Functor > - inline - void fan_in_reduce( const Functor & f ) const - { - typedef ReduceAdapter< Functor > Reduce ; - - for ( int i = 0 ; i < m_fan_size ; ++i ) { - - ThreadsExec & fan = *m_fan[i] ; - - Impl::spinwait( fan.m_state , ThreadsExec::Active ); - - f.join( Reduce::reference( reduce_base() ) , - Reduce::reference( fan.reduce_base() ) ); - } - } - - inline - void fan_in() const - { - for ( int i = 0 ; i < m_fan_size ; ++i ) { - Impl::spinwait( m_fan[i]->m_state , ThreadsExec::Active ); - } - } - - template< class FunctorType > - inline - void scan_large( const FunctorType & f ) - { - // Sequence of states: - // 0) Active : entry and exit state - // 1) ReductionAvailable : reduction value available - // 1) Rendezvous : all reduction values available and copied - // 2) ScanAvailable : scan value available - - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::scalar_type scalar_type ; - - const bool not_root = m_init_thread_rank + 1 < m_init_thread_size ; - const unsigned count = Reduce::value_count( f ); - - scalar_type * const work_value = (scalar_type *) reduce_base(); - - //-------------------------------- - // Fan-in reduction with highest ranking thread as the root - for ( int i = 0 ; i < m_fan_size ; ++i ) { - ThreadsExec & fan = *m_fan[i]; - - // Wait: Active -> ReductionAvailable - Impl::spinwait( fan.m_state , ThreadsExec::Active ); - f.join( Reduce::reference( work_value ) , Reduce::reference( fan.reduce_base() ) ); - } - - // Copy reduction value to scan value before releasing from this phase. - for ( unsigned i = 0 ; i < count ; ++i ) { work_value[i+count] = work_value[i] ; } - - if ( not_root ) { - m_state = ThreadsExec::ReductionAvailable ; - // Wait: ReductionAvailable -> Rendezvous - Impl::spinwait( m_state , ThreadsExec::ReductionAvailable ); - } - - for ( int i = 0 ; i < m_fan_size ; ++i ) { - m_fan[i]->m_state = ThreadsExec::Rendezvous ; - } - - // All non-root threads are now in the Rendezvous state - //-------------------------------- - - if ( not_root ) { - - // Wait for contributing threads' scan value to be available. - if ( m_fan[ m_fan_size ] ) { - ThreadsExec & th = *m_fan[ m_fan_size ] ; - - // Wait: Rendezvous -> ScanAvailable - Impl::spinwait( th.m_state , ThreadsExec::Rendezvous ); - - f.join( Reduce::reference( work_value + count ) , - Reduce::reference( ((scalar_type *)th.reduce_base()) + count ) ); - } - - m_state = ThreadsExec::ScanAvailable ; - } - - //-------------------------------- - - if ( m_fan[ m_fan_size + 1 ] ) { - ThreadsExec & th = *m_fan[ m_fan_size + 1 ] ; // Not the root thread - - // Wait: Rendezvous -> ScanAvailable - Impl::spinwait( th.m_state , ThreadsExec::Rendezvous ); - - const scalar_type * const src_value = ((scalar_type *)th.reduce_base()) + count ; - - for ( unsigned j = 0 ; j < count ; ++j ) { work_value[j] = src_value[j]; } - - th.m_state = ThreadsExec::Active ; // Release the source thread - } - else { - f.init( Reduce::reference( work_value ) ); - } - - // Wait for scan value to be claimed before exiting. - Impl::spinwait( m_state , ThreadsExec::ScanAvailable ); - } - - template< class FunctorType > - inline - void scan_small( const FunctorType & f ) - { - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::scalar_type scalar_type ; - - const bool not_root = m_init_thread_rank + 1 < m_init_thread_size ; - const unsigned count = Reduce::value_count( f ); - - scalar_type * const work_value = (scalar_type *) reduce_base(); - - //-------------------------------- - // Fan-in reduction with highest ranking thread as the root - for ( int i = 0 ; i < m_fan_size ; ++i ) { - // Wait: Active -> Rendezvous - Impl::spinwait( m_fan[i]->m_state , ThreadsExec::Active ); - } - - for ( unsigned i = 0 ; i < count ; ++i ) { work_value[i+count] = work_value[i]; } - - if ( not_root ) { - m_state = ThreadsExec::Rendezvous ; - // Wait: Rendezvous -> Active - Impl::spinwait( m_state , ThreadsExec::Rendezvous ); - } - else { - // Root thread does the thread-scan before releasing threads - - scalar_type * ptr_prev = 0 ; - - for ( int rank = 0 ; rank < m_init_thread_size ; ++rank ) { - scalar_type * const ptr = (scalar_type *) get_thread( rank )->reduce_base(); - if ( rank ) { - for ( unsigned i = 0 ; i < count ; ++i ) { ptr[i] = ptr_prev[ i + count ]; } - f.join( Reduce::reference( ptr + count ), Reduce::reference( ptr ) ); - } - else { - f.init( Reduce::reference( ptr ) ); - } - ptr_prev = ptr ; - } - } - - for ( int i = 0 ; i < m_fan_size ; ++i ) { - m_fan[i]->m_state = ThreadsExec::Active ; - } - } - - //------------------------------------ - // Team-only functions: - - void * get_shmem( const int size ); - - void team_barrier() - { - const bool not_root = m_team_rank + 1 < m_team_size ; - - for ( int i = 0 ; i < m_fan_team_size ; ++i ) { - Impl::spinwait( m_fan_team[i]->m_state , ThreadsExec::Active ); - } - if ( not_root ) { - m_state = Rendezvous ; - Impl::spinwait( m_state , ThreadsExec::Rendezvous ); - } - for ( int i = 0 ; i < m_fan_team_size ; ++i ) { - m_fan_team[i]->m_state = ThreadsExec::Active ; - } - } - - template< class ArgType > - inline - ArgType team_scan( const ArgType & value , ArgType * const global_accum = 0 ) - { - // Sequence of m_state_team states: - // 0) Inactive : entry and exit state - // 1) ReductionAvailable : reduction value available, waiting for scan value - // 2) ScanAvailable : reduction value available, scan value available - // 3) Rendezvous : broadcasting global iinter-team accumulation value - - // Make sure there is enough scratch space: - typedef typename if_c< 2 * sizeof(ArgType) < REDUCE_TEAM_BASE , ArgType , void >::type type ; - - const bool not_root = m_team_rank + 1 < m_team_size ; - type * const work_value = (type*) reduce_team(); - - // ThreadsExec::Inactive == m_state_team - - work_value[0] = value ; - - // Fan-in reduction, wait for source thread to complete it's fan-in reduction. - for ( int i = 0 ; i < m_fan_team_size ; ++i ) { - ThreadsExec & th = *m_fan_team[i]; - - // Wait for source thread to exit Inactive state. - Impl::spinwait( th.m_state_team , ThreadsExec::Inactive ); - // Source thread is 'ReductionAvailable' or 'ScanAvailable' - work_value[0] += ((volatile type*)th.reduce_team())[0]; - } - - work_value[1] = work_value[0] ; - - if ( not_root ) { - - m_state_team = ThreadsExec::ReductionAvailable ; // Reduction value is available. - - // Wait for contributing threads' scan value to be available. - if ( m_fan_team[ m_fan_team_size ] ) { - ThreadsExec & th = *m_fan_team[ m_fan_team_size ] ; - - // Wait: Inactive -> ReductionAvailable - Impl::spinwait( th.m_state_team , ThreadsExec::Inactive ); - // Wait: ReductionAvailable -> ScanAvailable: - Impl::spinwait( th.m_state_team , ThreadsExec::ReductionAvailable ); - - work_value[1] += ((volatile type*)th.reduce_team())[1] ; - } - - m_state_team = ThreadsExec::ScanAvailable ; // Scan value is available. - } - else { - // Root thread add team's total to global inter-team accumulation - work_value[0] = global_accum ? atomic_fetch_add( global_accum , work_value[0] ) : 0 ; - } - - for ( int i = 0 ; i < m_fan_team_size ; ++i ) { - ThreadsExec & th = *m_fan_team[i]; - // Wait: ReductionAvailable -> ScanAvailable - Impl::spinwait( th.m_state_team , ThreadsExec::ReductionAvailable ); - // Wait: ScanAvailable -> Rendezvous - Impl::spinwait( th.m_state_team , ThreadsExec::ScanAvailable ); - } - - // All fan-in threads are in the ScanAvailable state - if ( not_root ) { - m_state_team = ThreadsExec::Rendezvous ; - Impl::spinwait( m_state_team , ThreadsExec::Rendezvous ); - } - - // Broadcast global inter-team accumulation value - volatile type & global_val = work_value[0] ; - for ( int i = 0 ; i < m_fan_team_size ; ++i ) { - ThreadsExec & th = *m_fan_team[i]; - ((volatile type*)th.reduce_team())[0] = global_val ; - th.m_state_team = ThreadsExec::Inactive ; - } - // Exclusive scan, subtract contributed value - return global_val + work_value[1] - value ; - } - - /* When a functor using the 'device' interface requests - * more teams than are initialized the parallel operation - * must loop over a range of league ranks with a team_barrier - * between each iteration. - */ - bool team_work_avail() - { - m_shared_iter = 0 ; - return m_work_league_rank < m_work_league_end ; - } - - void team_work_next() - { if ( ++m_work_league_rank < m_work_league_end ) team_barrier(); } - - //------------------------------------ - /** \brief Wait for previous asynchronous functor to - * complete and release the Threads device. - * Acquire the Threads device and start this functor. - */ - static void start( void (*)( ThreadsExec & , const void * ) , const void * , int = 0 ); - - static int league_max(); - static int team_max(); - - static int in_parallel(); - static void fence(); - static bool sleep(); - static bool wake(); -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -inline int Threads::in_parallel() -{ return Impl::ThreadsExec::in_parallel(); } - -inline int Threads::is_initialized() -{ return Impl::ThreadsExec::is_initialized(); } - -inline void Threads::initialize( - unsigned team_count , - unsigned threads_per_team , - unsigned use_numa_count , - unsigned use_cores_per_numa ) -{ - Impl::ThreadsExec::initialize( - std::pair( team_count , threads_per_team ), - std::pair( use_numa_count , use_cores_per_numa ) ); -} - -inline void Threads::finalize() -{ - Impl::ThreadsExec::finalize(); -} - -inline void Threads::print_configuration( std::ostream & s , const bool detail ) -{ - Impl::ThreadsExec::print_configuration( s , detail ); -} - -inline unsigned Threads::league_max() -{ return Impl::ThreadsExec::league_max() ; } - -inline unsigned Threads::team_max() -{ return Impl::ThreadsExec::team_max() ; } - -inline bool Threads::sleep() -{ return Impl::ThreadsExec::sleep() ; } - -inline bool Threads::wake() -{ return Impl::ThreadsExec::wake() ; } - -inline void Threads::fence() -{ Impl::ThreadsExec::fence() ; } - -inline int Threads::league_rank() const -{ return m_exec.m_work_league_rank ; } - -inline int Threads::league_size() const -{ return m_exec.m_work_league_size ; } - -inline int Threads::team_rank() const -{ return m_exec.m_team_rank ; } - -inline int Threads::team_size() const -{ return m_exec.m_team_size ; } - -inline void Threads::team_barrier() -{ return m_exec.team_barrier(); } - -inline Threads::Threads( Impl::ThreadsExec & t ) : m_exec( t ) {} - -template< typename Type > -inline Type Threads::team_scan( const Type & value ) -{ return m_exec.team_scan( value ); } - -template< typename TypeLocal , typename TypeGlobal > -inline TypeGlobal Threads::team_scan( const TypeLocal & value , TypeGlobal * const global_accum ) -{ return m_exec.template team_scan< TypeGlobal >( value , global_accum ); } - -inline -void * Threads::get_shmem( const int size ) { return m_exec.get_shmem( size ); } - -} /* namespace Kokkos */ - -#endif /* #define KOKKOS_THREADSEXEC_HPP */ - diff --git a/kokkos/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp b/kokkos/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp deleted file mode 100644 index 1e7cb0f..0000000 --- a/kokkos/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include - -#include -#include - -/*--------------------------------------------------------------------------*/ - -#if defined( KOKKOS_HAVE_PTHREAD ) - -/* Standard 'C' Linux libraries */ - -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -namespace { - -pthread_mutex_t host_internal_pthread_mutex = PTHREAD_MUTEX_INITIALIZER ; - -// Pthreads compatible driver: - -void * internal_pthread_driver( void * ) -{ - ThreadsExec::driver(); - - return NULL ; -} - -} // namespace - -//---------------------------------------------------------------------------- -// Spawn a thread - -bool ThreadsExec::spawn() -{ - bool result = false ; - - pthread_attr_t attr ; - - if ( 0 == pthread_attr_init( & attr ) || - 0 == pthread_attr_setscope( & attr, PTHREAD_SCOPE_SYSTEM ) || - 0 == pthread_attr_setdetachstate( & attr, PTHREAD_CREATE_DETACHED ) ) { - - pthread_t pt ; - - result = 0 == pthread_create( & pt, & attr, internal_pthread_driver, 0 ); - } - - pthread_attr_destroy( & attr ); - - return result ; -} - -//---------------------------------------------------------------------------- - -bool ThreadsExec::is_process() -{ - static const pthread_t master_pid = pthread_self(); - - return pthread_equal( master_pid , pthread_self() ); -} - -void ThreadsExec::global_lock() -{ - pthread_mutex_lock( & host_internal_pthread_mutex ); -} - -void ThreadsExec::global_unlock() -{ - pthread_mutex_unlock( & host_internal_pthread_mutex ); -} - -//---------------------------------------------------------------------------- - -void ThreadsExec::wait_yield( volatile int & flag , const int value ) -{ - while ( value == flag ) { sched_yield(); } -} - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#elif defined( KOKKOS_HAVE_WINTHREAD ) - -/* Windows libraries */ -#include -#include - -//---------------------------------------------------------------------------- -// Driver for each created pthread - -namespace Kokkos { -namespace Impl { -namespace { - -unsigned WINAPI internal_winthread_driver( void * arg ) -{ - ThreadsExec::driver(); - - return 0 ; -} - -class ThreadLockWindows { -private: - CRITICAL_SECTION m_handle ; - - ~ThreadLockWindows() - { DeleteCriticalSection( & m_handle ); } - - ThreadLockWindows(); - { InitializeCriticalSection( & m_handle ); } - - ThreadLockWindows( const ThreadLockWindows & ); - ThreadLockWindows & operator = ( const ThreadLockWindows & ); - -public: - - static ThreadLockWindows & singleton(); - - void lock() - { EnterCriticalSection( & m_handle ); } - - void unlock() - { LeaveCriticalSection( & m_handle ); } -}; - -ThreadLockWindows & ThreadLockWindows::singleton() -{ static ThreadLockWindows self ; return self ; } - -} // namespace <> -} // namespace Kokkos -} // namespace Impl - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -// Spawn this thread - -bool ThreadsExec::spawn() -{ - unsigned Win32ThreadID = 0 ; - - HANDLE handle = - _beginthreadex(0,0,internal_winthread_driver,0,0, & Win32ThreadID ); - - return ! handle ; -} - -bool ThreadsExec::is_process() { return true ; } - -void ThreadsExec::global_lock() -{ ThreadLockWindows::singleton().lock(); } - -void ThreadsExec::global_unlock() -{ ThreadLockWindows::singleton().unlock(); } - -void ThreadsExec::wait_yield( volatile int & flag , const int value ) {} -{ - while ( value == flag ) { Sleep(0); } -} - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#else /* NO Threads */ - -namespace Kokkos { -namespace Impl { - -bool ThreadsExec::spawn() -{ - std::string msg("Kokkos::Threads ERROR : Attempting to spawn threads without configuring with a threading library. Try configuring with KOKKOS_HAVE_PTHREAD"); - throw std::runtime_error( msg ); - - return false ; -} - -bool ThreadsExec::is_process() { return true ; } -void ThreadsExec::global_lock() {} -void ThreadsExec::global_unlock() {} -void ThreadsExec::wait_yield( volatile int & , const int ) {} - -} // namespace Impl -} // namespace Kokkos - -#endif /* End thread model */ - diff --git a/kokkos/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/kokkos/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp deleted file mode 100644 index 184237f..0000000 --- a/kokkos/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp +++ /dev/null @@ -1,422 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_THREADS_PARALLEL_HPP -#define KOKKOS_THREADS_PARALLEL_HPP - -#include - -#include -#include - -#include - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -template< class FunctorType , class WorkSpec > -class ParallelFor< FunctorType , WorkSpec , Kokkos::Threads > -{ -public: - - const FunctorType m_func ; - const size_t m_work ; - - static void execute( ThreadsExec & exec , const void * arg ) - { - const ParallelFor & self = * ((const ParallelFor *) arg ); - - const std::pair work = exec.work_range( self.m_work ); - - for ( size_t iwork = work.first ; iwork < work.second ; ++iwork ) { - self.m_func( iwork ); - } - - exec.fan_in(); - } - - ParallelFor( const FunctorType & functor , const size_t work ) - : m_func( functor ), m_work( work ) - { - ThreadsExec::start( & ParallelFor::execute , this ); - ThreadsExec::fence(); - } - - inline void wait() {} - - inline ~ParallelFor() { wait(); } -}; - -template< class FunctorType > -class ParallelFor< FunctorType , ParallelWorkRequest , Kokkos::Threads > -{ -public: - - const FunctorType m_func ; - - static void execute( ThreadsExec & exec , const void * arg ) - { - const ParallelFor & self = * ((const ParallelFor *) arg ); - - for ( ; exec.team_work_avail() ; exec.team_work_next() ) { - self.m_func( Threads( exec ) ); - } - - exec.fan_in(); - } - - ParallelFor( const FunctorType & functor , const ParallelWorkRequest & work ) - : m_func( functor ) - { - ThreadsExec::resize_shared_scratch( FunctorShmemSize< FunctorType >::value( functor ) ); - ThreadsExec::start( & ParallelFor::execute , this , work.league_size ); - ThreadsExec::fence(); - } - - inline void wait() {} - - inline ~ParallelFor() { wait(); } -}; - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -template< class FunctorType , class WorkSpec > -class ParallelReduce< FunctorType , WorkSpec , Kokkos::Threads > -{ -public: - - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - - const FunctorType m_func ; - const size_t m_work ; - - static void execute( ThreadsExec & exec , const void * arg ) - { - const ParallelReduce & self = * ((const ParallelReduce *) arg ); - - typename Reduce::reference_type update = Reduce::reference( exec.reduce_base() ); - - self.m_func.init( update ); // Initialize thread-local value - - const std::pair work = exec.work_range( self.m_work ); - - for ( size_t iwork = work.first ; iwork < work.second ; ++iwork ) { - self.m_func( iwork , update ); - } - - exec.fan_in_reduce( self.m_func ); - } - - ParallelReduce( const FunctorType & functor , - const size_t work , - const pointer_type result_ptr = 0 ) - : m_func( functor ), m_work( work ) - { - ThreadsExec::resize_reduce_scratch( Reduce::value_size( m_func ) ); - - ThreadsExec::start( & ParallelReduce::execute , this ); - - const pointer_type data = (pointer_type) ThreadsExec::root_reduce_scratch(); - - ThreadsExec::fence(); - - Reduce::final( m_func , data ); - - if ( result_ptr ) { - const unsigned n = Reduce::value_count( m_func ); - for ( unsigned i = 0 ; i < n ; ++i ) { result_ptr[i] = data[i]; } - } - } - - inline void wait() {} - - inline ~ParallelReduce() { wait(); } -}; - -template< class FunctorType > -class ParallelReduce< FunctorType , ParallelWorkRequest , Kokkos::Threads > -{ -public: - - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - - const FunctorType m_func ; - - static void execute( ThreadsExec & exec , const void * arg ) - { - const ParallelReduce & self = * ((const ParallelReduce *) arg ); - - typename Reduce::reference_type update = Reduce::reference( exec.reduce_base() ); - - self.m_func.init( update ); // Initialize thread-local value - - for ( ; exec.team_work_avail() ; exec.team_work_next() ) { - self.m_func( Threads( exec ) , update ); - } - - exec.fan_in_reduce( self.m_func ); - } - - ParallelReduce( const FunctorType & functor , - const ParallelWorkRequest & work , - const pointer_type result_ptr = 0 ) - : m_func( functor ) - { - ThreadsExec::resize_shared_scratch( FunctorShmemSize< FunctorType >::value( functor ) ); - ThreadsExec::resize_reduce_scratch( Reduce::value_size( m_func ) ); - - ThreadsExec::start( & ParallelReduce::execute , this , work.league_size ); - - const pointer_type data = (pointer_type) ThreadsExec::root_reduce_scratch(); - - ThreadsExec::fence(); - - Reduce::final( m_func , data ); - - if ( result_ptr ) { - const unsigned n = Reduce::value_count( m_func ); - for ( unsigned i = 0 ; i < n ; ++i ) { result_ptr[i] = data[i]; } - } - } - - inline void wait() {} - - inline ~ParallelReduce() { wait(); } -}; - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -struct ThreadsExecUseScanSmall { - size_t nwork ; - operator size_t () const { return nwork ; } - ThreadsExecUseScanSmall( size_t n ) : nwork( n ) {} -}; - -template< class FunctorType , class WorkSpec > -class ParallelScan< FunctorType , WorkSpec , Kokkos::Threads > -{ -public: - - typedef ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - - const FunctorType m_func ; - const size_t m_work ; - - static void execute( ThreadsExec & exec , const void * arg ) - { - const ParallelScan & self = * ((const ParallelScan *) arg ); - - const std::pair work = exec.work_range( self.m_work ); - - typename Reduce::reference_type update = Reduce::reference( exec.reduce_base() ); - - self.m_func.init( update ); - - for ( size_t iwork = work.first ; iwork < work.second ; ++iwork ) { - self.m_func( iwork , update , false ); - } - - // Compile time selection of scan algorithm to support unit testing - // of both large and small thread count algorithms. - if ( ! is_same< WorkSpec , ThreadsExecUseScanSmall >::value ) { - exec.scan_large( self.m_func ); - } - else { - exec.scan_small( self.m_func ); - } - - for ( size_t iwork = work.first ; iwork < work.second ; ++iwork ) { - self.m_func( iwork , update , true ); - } - - exec.fan_in(); - } - - ParallelScan( const FunctorType & functor , const size_t nwork ) - : m_func( functor ) - , m_work( nwork ) - { - ThreadsExec::resize_reduce_scratch( 2 * Reduce::value_size( m_func ) ); - ThreadsExec::start( & ParallelScan::execute , this ); - ThreadsExec::fence(); - } - - inline void wait() {} - - inline ~ParallelScan() { wait(); } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template<> -class MultiFunctorParallelReduce< Threads > { -private: - - struct MemberBase { - virtual void init( Impl::ThreadsExec & ) const = 0 ; - virtual void exec( Impl::ThreadsExec & ) const = 0 ; - virtual void fan_in_reduce( Impl::ThreadsExec & ) const = 0 ; - virtual void output( void * ) const = 0 ; - virtual ~MemberBase() {} - }; - - template< class FunctorType > - struct Member : public MemberBase { - typedef Impl::ReduceAdapter< FunctorType > Reduce ; - typedef typename Reduce::pointer_type pointer_type ; - - const FunctorType m_func ; - const size_t m_work ; - - ~Member() {} - - Member( const FunctorType & func , const size_t work ) - : m_func( func ), m_work( work ) - { - Impl::ThreadsExec::resize_reduce_scratch( Reduce::value_size( m_func ) ); - } - - void init( Impl::ThreadsExec & exec ) const - { m_func.init( Reduce::reference( exec.reduce_base() ) ); } - - void exec( Impl::ThreadsExec & exec ) const - { - typename Reduce::reference_type update = Reduce::reference( exec.reduce_base() ); - - const std::pair work = exec.work_range( m_work ); - - for ( size_t iwork = work.first ; iwork < work.second ; ++iwork ) { - m_func( iwork , update ); - } - } - - void fan_in_reduce( Impl::ThreadsExec & exec ) const - { exec.fan_in_reduce( m_func ); } - - void output( void * ptr ) const - { - const pointer_type result = (pointer_type) ptr ; - const pointer_type data = (pointer_type) Impl::ThreadsExec::root_reduce_scratch(); - - Impl::ThreadsExec::fence(); - - Reduce::final( m_func , data ); - - if ( result ) { - const unsigned n = Reduce::value_count( m_func ); - for ( unsigned i = 0 ; i < n ; ++i ) { result[i] = data[i]; } - } - } - }; - - std::vector< MemberBase * > m_members ; - - static void execute_members( Impl::ThreadsExec & exec , const void * arg ) - { - const MultiFunctorParallelReduce & self = * ((const MultiFunctorParallelReduce *) arg ); - - // First functor initializes: - - self.m_members.front()->init( exec ); // Initialize thread-local value - - for ( unsigned i = 0 ; i < self.m_members.size() ; ++i ) { - self.m_members[i]->exec( exec ); - } - - // Last functor fan-in reduce: - - self.m_members.back()->fan_in_reduce( exec ); - } - -public: - - inline - void execute( void * host_ptr ) const - { - if ( ! m_members.empty() ) { - Impl::ThreadsExec::start( & MultiFunctorParallelReduce::execute_members , this ); - m_members.back()->output( host_ptr ); - } - } - - inline - void wait() const {} - - template< class FunctorType > - void push_back( const size_t work_count , const FunctorType & f ) - { - MemberBase * const m = new Member< FunctorType >( f , work_count ); - m_members.push_back( m ); - } - - ~MultiFunctorParallelReduce() - { - while ( ! m_members.empty() ) { - delete m_members.back(); - m_members.pop_back(); - } - } -}; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #define KOKKOS_THREADS_PARALLEL_HPP */ - diff --git a/kokkos/kokkos/core/src/build.cuda.mac b/kokkos/kokkos/core/src/build.cuda.mac deleted file mode 100755 index 8c94550..0000000 --- a/kokkos/kokkos/core/src/build.cuda.mac +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -touch KokkosCore_config.h - -#flags="-I../ -I./ -I../../../TPL -c -O3 -arch=sm_30 -Xcompiler -fPIC -DKOKKOS_HAVE_CUDA -DKOKKOS_HAVE_PTHREAD --compiler-bindir=/Users/mhoemme/pkg/gcc-4.7.2/bin" -flags="-I../ -I./ -I../../../TPL -c -O3 -arch=sm_30 -Xcompiler -fPIC -DKOKKOS_HAVE_CUDA -DKOKKOS_HAVE_PTHREAD" -CC=nvcc -cd Cuda -rm *.o -$CC $flags Kokkos_Cuda_Impl.cu -$CC $flags Kokkos_CudaSpace.cu -cd .. -cd impl -rm *.o -$CC $flags Kokkos_hwloc.cpp -$CC $flags Kokkos_MemoryTracking.cpp -$CC $flags Kokkos_Shape.cpp -$CC $flags Kokkos_Error.cpp -$CC $flags Kokkos_HostSpace.cpp -$CC $flags Kokkos_Serial.cpp -cd .. -cd Threads -rm *.o -$CC $flags Kokkos_ThreadsExec.cpp -$CC $flags Kokkos_ThreadsExec_base.cpp -cd .. -$CC -arch=sm_35 -lib -o libkokkoscore-cuda.a Cuda/*.o impl/*.o Threads/*.o - diff --git a/kokkos/kokkos/core/src/build_common.sh b/kokkos/kokkos/core/src/build_common.sh deleted file mode 100755 index 8051609..0000000 --- a/kokkos/kokkos/core/src/build_common.sh +++ /dev/null @@ -1,271 +0,0 @@ -#!/bin/bash - -#----------------------------------------------------------------------------- -# Shared portion of build script for the base Kokkos functionality -# Simple build script with options -#----------------------------------------------------------------------------- -if [ ! -d "${KOKKOS}" \ - -o ! -d "${KOKKOS}/src" \ - -o ! -d "${KOKKOS}/src/impl" \ - -o ! -d "${KOKKOS}/src/Cuda" \ - -o ! -d "${KOKKOS}/src/OpenMP" \ - -o ! -d "${KOKKOS}/src/Threads" \ - ] ; -then -echo "Must set KOKKOS to the kokkos/core directory" -exit -1 -fi - -#----------------------------------------------------------------------------- - -INC_PATH="-I${KOKKOS}/src" -INC_PATH="${INC_PATH} -I${KOKKOS}/../TPL" - -#----------------------------------------------------------------------------- - -while [ -n "${1}" ] ; do - -ARG="${1}" -shift 1 - -case ${ARG} in -#----------- OPTIONS ----------- -OPT | opt | O3 | -O3 ) OPTFLAGS="${OPTFLAGS} -O3" ;; -#------------------------------- -DBG | dbg | g | -g ) KOKKOS_EXPRESSION_CHECK=1 ;; -#------------------------------- -HWLOC | hwloc ) KOKKOS_HAVE_HWLOC=${1} ; shift 1 ;; -#------------------------------- -MPI | mpi ) - KOKKOS_HAVE_MPI=${1} ; shift 1 - CXX="${KOKKOS_HAVE_MPI}/bin/mpicxx" - LINK="${KOKKOS_HAVE_MPI}/bin/mpicxx" - INC_PATH="${INC_PATH} -I${KOKKOS_HAVE_MPI}/include" - ;; -#------------------------------- -OMP | omp | OpenMP ) - KOKKOS_HAVE_OPENMP=1 - ;; -#------------------------------- -CUDA | Cuda | cuda ) - # CUDA_ARCH options: 20 30 35 - CUDA_ARCH=${1} ; shift 1 - # - # -x cu : process all files through the Cuda compiler as Cuda code. - # -lib -o : produce library - # - NVCC="nvcc -DKOKKOS_HAVE_CUDA_ARCH=${CUDA_ARCH}0 -gencode arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" - NVCC="${NVCC} -maxrregcount=64" - NVCC="${NVCC} -Xcompiler -Wall,-ansi" - NVCC="${NVCC} -lib -o libCuda.a -x cu" - - NVCC_SOURCES="${NVCC_SOURCES} ${KOKKOS}/src/Cuda/*.cu" - LIB="${LIB} libCuda.a -L/usr/local/cuda/lib64 -lcudart -lcusparse" - ;;#------------------------------- -CUDA_OSX | Cuda_OSX | cuda_osx ) - # CUDA_ARCH options: 20 30 35 - CUDA_ARCH=${1} ; shift 1 - # - # -x cu : process all files through the Cuda compiler as Cuda code. - # -lib -o : produce library - # - NVCC="nvcc -DKOKKOS_HAVE_CUDA_ARCH=${CUDA_ARCH}0 -gencode arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" - NVCC="${NVCC} -maxrregcount=64" - NVCC="${NVCC} -Xcompiler -Wall,-ansi -Xcompiler -m64" - NVCC="${NVCC} -lib -o libCuda.a -x cu" - - NVCC_SOURCES="${NVCC_SOURCES} ${KOKKOS}/src/Cuda/*.cu" - LIB="${LIB} libCuda.a -Xlinker -rpath -Xlinker /Developer/NVIDIA/CUDA-5.5/lib -L /Developer/NVIDIA/CUDA-5.5/lib -lcudart -lcusparse" - ;; -#------------------------------- -GNU | gnu | g++ ) - # Turn on lots of warnings and ansi compliance. - # The Trilinos build system requires '-pedantic' - # - CXX="g++ -Wall -Wextra -ansi -pedantic" - LINK="g++" - CXX="${CXX} -rdynamic -DENABLE_TRACEBACK" - LIB="${LIB} -ldl" - ;; -#------------------------------- -GNU_OSX | gnu_osx | g++_osx ) - # Turn on lots of warnings and ansi compliance. - # The Trilinos build system requires '-pedantic' - # - CXX="g++ -Wall -Wextra -ansi -pedantic -m64" - LINK="g++" - CXX="${CXX} -DENABLE_TRACEBACK" - LIB="${LIB} -ldl" - ;; -#------------------------------- -INTEL | intel | icc | icpc ) - # -xW = use SSE and SSE2 instructions - CXX="icpc -Wall" - LINK="icpc" - LIB="${LIB} -lstdc++" - ;; -#------------------------------- -MPIINTEL | mpiintel | mpiicc | mpiicpc ) - # -xW = use SSE and SSE2 instructions - CXX="mpiicpc -Wall" - LINK="mpiicpc" - LIB="${LIB} -lstdc++" - KOKKOS_HAVE_MPI=1 -;; -#------------------------------- -MIC | mic ) - CXX="icpc -mmic -ansi-alias -Wall" - LINK="icpc -mmic" - CXX="${CXX} -mGLOB_default_function_attrs=knc_stream_store_controls=2" - # CXX="${CXX} -vec-report6" - # CXX="${CXX} -guide-vec" - LIB="${LIB} -lstdc++" - COMPILE_MIC="on" - ;; -#------------------------------- -MPIMIC | mpimic ) - CXX="mpiicpc -mmic -ansi-alias -Wall" - LINK="mpiicpc -mmic" - KOKKOS_HAVE_MPI=1 - CXX="${CXX} -mGLOB_default_function_attrs=knc_stream_store_controls=2" - # CXX="${CXX} -vec-report6" - # CXX="${CXX} -guide-vec" - LIB="${LIB} -lstdc++" - COMPILE_MIC="on" - ;; -#------------------------------- -curie ) - CXX="CC" - LINK="CC" - INC_PATH="${INC_PATH} -I/opt/cray/mpt/default/gni/mpich2-cray/74" - KOKKOS_HAVE_MPI=1 - ;; -#------------------------------- -MKL | mkl ) - HAVE_MKL=${1} ; shift 1 ; - CXX_FLAGS="${CXX_FLAGS} -DKOKKOS_USE_MKL -I${HAVE_MKL}/include/" - ARCH="intel64" - if [ -n "${COMPILE_MIC}" ] ; - then - ARCH="mic" - fi - LIB="${LIB} -L${HAVE_MKL}/lib/${ARCH}/ -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core" - NVCC_FLAGS="${NVCC_FLAGS} -DKOKKOS_USE_MKL" -;; -#------------------------------- -CUSPARSE | cusparse ) - CXX_FLAGS="${CXX_FLAGS} -DKOKKOS_USE_CUSPARSE" - NVCC_FLAGS="${NVCC_FLAGS} -DKOKKOS_USE_CUSPARSE" - LIB="${LIB} -lcusparse" -;; -#------------------------------- -AVX | avx ) - CXX_FLAGS="${CXX_FLAGS} -mavx" -;; -#------------------------------- -*) echo 'unknown option: ' ${ARG} ; exit -1 ;; -esac -done - -#----------------------------------------------------------------------------- - -if [ -z "${CXX}" ] ; -then - echo "No C++ compiler selected" - exit -1 -fi - -if [ -n "${KOKKOS_HAVE_OPENMP}" ] -then -CXX="${CXX} -fopenmp" -CXX_SOURCES="${CXX_SOURCES} ${KOKKOS}/src/OpenMP/*.cpp" -fi - -#----------------------------------------------------------------------------- -# Option for PTHREAD or WINTHREAD eventually - -KOKKOS_HAVE_PTHREAD=1 - -if [ -n "${KOKKOS_HAVE_PTHREAD}" ] ; -then - LIB="${LIB} -lpthread" -fi - -#----------------------------------------------------------------------------- -# Attach options to compile lines - -CXX="${CXX} ${OPTFLAGS}" - -if [ -n "${NVCC}" ] ; -then - NVCC="${NVCC} ${OPTFLAGS}" -fi - -#----------------------------------------------------------------------------- - -CXX_SOURCES="${CXX_SOURCES} ${KOKKOS}/src/impl/*.cpp" -CXX_SOURCES="${CXX_SOURCES} ${KOKKOS}/src/Threads/*.cpp" - -#----------------------------------------------------------------------------- -# - -if [ -n "${KOKKOS_HAVE_HWLOC}" ] ; -then - - if [ ! -d ${KOKKOS_HAVE_HWLOC} ] ; - then - echo "${KOKKOS_HAVE_HWLOC} does not exist" - exit 1 - fi - - echo "LD_LIBRARY_PATH must include ${KOKKOS_HAVE_HWLOC}/lib" - - LIB="${LIB} -L${KOKKOS_HAVE_HWLOC}/lib -lhwloc" - INC_PATH="${INC_PATH} -I${KOKKOS_HAVE_HWLOC}/include" -fi - -#----------------------------------------------------------------------------- - -INC_PATH="${INC_PATH} -I." - -CONFIG="KokkosCore_config.h" - -rm -f ${CONFIG} - -echo "#ifndef KOKKOS_CORE_CONFIG_H" >> ${CONFIG} -echo "#define KOKKOS_CORE_CONFIG_H" >> ${CONFIG} - -if [ -n "${KOKKOS_HAVE_MPI}" ] ; -then - echo "#define KOKKOS_HAVE_MPI" >> ${CONFIG} -fi - -if [ -n "${NVCC}" ] ; -then - echo "#define KOKKOS_HAVE_CUDA" >> ${CONFIG} -fi - -if [ -n "${KOKKOS_HAVE_PTHREAD}" ] ; -then - echo "#define KOKKOS_HAVE_PTHREAD" >> ${CONFIG} -fi - -if [ -n "${KOKKOS_HAVE_HWLOC}" ] ; -then - echo "#define KOKKOS_HAVE_HWLOC" >> ${CONFIG} -fi - -if [ -n "${KOKKOS_HAVE_OPENMP}" ] ; -then - echo "#define KOKKOS_HAVE_OPENMP" >> ${CONFIG} -fi - -if [ -n "${KOKKOS_EXPRESSION_CHECK}" ] ; -then - echo "#define KOKKOS_EXPRESSION_CHECK" >> ${CONFIG} -fi - -echo "#endif" >> ${CONFIG} - -#----------------------------------------------------------------------------- - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp b/kokkos/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp deleted file mode 100644 index 51f4fbd..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp +++ /dev/null @@ -1,267 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_ANALYZESHAPE_HPP -#define KOKKOS_ANALYZESHAPE_HPP - -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -/** \brief Analyze the array shape defined by a Kokkos::View data type. - * - * It is presumed that the data type can be mapped down to a multidimensional - * array of an intrinsic scalar numerical type (double, float, int, ... ). - * The 'value_type' of an array may be an embedded aggregate type such - * as a fixed length array 'Array'. In this case the 'scalar_type' - * is 'T' and the 'value_type' is 'Array' to enable data layout - * according to shape and scalar_type AND data access by value_type. - * - * The embedded aggregate type must have an AnalyzeShape specialization - * to map it down to a shape and intrinsic scalar numerical type. - */ - -template< class T > -struct AnalyzeShape : public Shape< sizeof(T) , 0 > -{ - typedef Shape< sizeof(T), 0 > shape ; - - typedef T scalar_type ; - typedef T array_type ; - typedef T value_type ; - typedef T type ; - typedef const T const_scalar_type ; - typedef const T const_array_type ; - typedef const T const_value_type ; - typedef const T const_type ; - typedef T non_const_scalar_type ; - typedef T non_const_array_type ; - typedef T non_const_value_type ; - typedef T non_const_type ; -}; - -template<> -struct AnalyzeShape : public Shape< 0 , 0 > -{ - typedef Shape< 0 , 0 > shape ; - - typedef void scalar_type ; - typedef void array_type ; - typedef void value_type ; - typedef void type ; - typedef const void const_scalar_type ; - typedef const void const_array_type ; - typedef const void const_value_type ; - typedef const void const_type ; - typedef void non_const_scalar_type ; - typedef void non_const_array_type ; - typedef void non_const_value_type ; - typedef void non_const_type ; -}; - -template< class T > -struct AnalyzeShape< const T > : public AnalyzeShape::shape -{ -private: - typedef AnalyzeShape nested ; -public: - - typedef typename nested::shape shape ; - - typedef typename nested::const_scalar_type scalar_type ; - typedef typename nested::const_array_type array_type ; - typedef typename nested::const_value_type value_type ; - typedef typename nested::const_type type ; - - typedef typename nested::const_scalar_type const_scalar_type ; - typedef typename nested::const_array_type const_array_type ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type const_type ; - - typedef typename nested::non_const_scalar_type non_const_scalar_type ; - typedef typename nested::non_const_array_type non_const_array_type ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type non_const_type ; -}; - -template< class T > -struct AnalyzeShape< T * > - : public ShapeInsert< typename AnalyzeShape::shape , 0 >::type -{ -private: - typedef AnalyzeShape nested ; -public: - - typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ; - - typedef typename nested::scalar_type scalar_type ; - typedef typename nested::array_type * array_type ; - typedef typename nested::value_type value_type ; - typedef typename nested::type * type ; - - typedef typename nested::const_scalar_type const_scalar_type ; - typedef typename nested::const_array_type * const_array_type ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type * const_type ; - - typedef typename nested::non_const_scalar_type non_const_scalar_type ; - typedef typename nested::non_const_array_type * non_const_array_type ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type * non_const_type ; -}; - -template< class T > -struct AnalyzeShape< T[] > - : public ShapeInsert< typename AnalyzeShape::shape , 0 >::type -{ -private: - typedef AnalyzeShape nested ; -public: - - typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ; - - typedef typename nested::scalar_type scalar_type ; - typedef typename nested::array_type array_type [] ; - typedef typename nested::value_type value_type ; - typedef typename nested::type type [] ; - - typedef typename nested::const_scalar_type const_scalar_type ; - typedef typename nested::const_array_type const_array_type [] ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type const_type [] ; - - typedef typename nested::non_const_scalar_type non_const_scalar_type ; - typedef typename nested::non_const_array_type non_const_array_type [] ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type non_const_type [] ; -}; - -template< class T > -struct AnalyzeShape< const T[] > - : public ShapeInsert< typename AnalyzeShape< const T >::shape , 0 >::type -{ -private: - typedef AnalyzeShape< const T > nested ; -public: - - typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ; - - typedef typename nested::scalar_type scalar_type ; - typedef typename nested::array_type array_type [] ; - typedef typename nested::value_type value_type ; - typedef typename nested::type type [] ; - - typedef typename nested::const_scalar_type const_scalar_type ; - typedef typename nested::const_array_type const_array_type [] ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type const_type [] ; - - typedef typename nested::non_const_scalar_type non_const_scalar_type ; - typedef typename nested::non_const_array_type non_const_array_type [] ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type non_const_type [] ; -}; - -template< class T , unsigned N > -struct AnalyzeShape< T[N] > - : public ShapeInsert< typename AnalyzeShape::shape , N >::type -{ -private: - typedef AnalyzeShape nested ; -public: - - typedef typename ShapeInsert< typename nested::shape , N >::type shape ; - - typedef typename nested::scalar_type scalar_type ; - typedef typename nested::array_type array_type [N] ; - typedef typename nested::value_type value_type ; - typedef typename nested::type type [N] ; - - typedef typename nested::const_scalar_type const_scalar_type ; - typedef typename nested::const_array_type const_array_type [N] ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type const_type [N] ; - - typedef typename nested::non_const_scalar_type non_const_scalar_type ; - typedef typename nested::non_const_array_type non_const_array_type [N] ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type non_const_type [N] ; -}; - -template< class T , unsigned N > -struct AnalyzeShape< const T[N] > - : public ShapeInsert< typename AnalyzeShape< const T >::shape , N >::type -{ -private: - typedef AnalyzeShape< const T > nested ; -public: - - typedef typename ShapeInsert< typename nested::shape , N >::type shape ; - - typedef typename nested::scalar_type scalar_type ; - typedef typename nested::array_type array_type [N] ; - typedef typename nested::value_type value_type ; - typedef typename nested::type type [N] ; - - typedef typename nested::const_scalar_type const_scalar_type ; - typedef typename nested::const_array_type const_array_type [N] ; - typedef typename nested::const_value_type const_value_type ; - typedef typename nested::const_type const_type [N] ; - - typedef typename nested::non_const_scalar_type non_const_scalar_type ; - typedef typename nested::non_const_array_type non_const_array_type [N] ; - typedef typename nested::non_const_value_type non_const_value_type ; - typedef typename nested::non_const_type non_const_type [N] ; -}; - -} // namespace Impl -} // namespace Kokkos - -#endif /* #ifndef KOKKOS_ANALYZESHAPE_HPP */ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/kokkos/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp deleted file mode 100644 index 3fbc728..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp +++ /dev/null @@ -1,160 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP ) -#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP - -namespace Kokkos { - -//---------------------------------------------------------------------------- -// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type). -// Must cast-away 'volatile' for the CAS call. - -#if defined( KOKKOS_ATOMICS_USE_CUDA ) - -KOKKOS_INLINE_FUNCTION -int atomic_compare_exchange( volatile int * const dest, const int compare, const int val) -{ return atomicCAS((int*)dest,compare,val); } - -KOKKOS_INLINE_FUNCTION -unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val) -{ return atomicCAS((unsigned int*)dest,compare,val); } - -KOKKOS_INLINE_FUNCTION -unsigned long long int atomic_compare_exchange( volatile unsigned long long int * const dest , - const unsigned long long int compare , - const unsigned long long int val ) -{ return atomicCAS((unsigned long long int*)dest,compare,val); } - -template < typename T > -KOKKOS_INLINE_FUNCTION -typename Kokkos::Impl::UnionPair::first_type -atomic_compare_exchange( volatile T * const dest , const T compare , const T val ) -{ - typedef Kokkos::Impl::UnionPair union_type ; - typedef typename union_type::second_type int_type ; - - return union_type( atomicCAS( (int_type *) union_type::cast( dest ) , - union_type::cast( compare ) , - union_type::cast( val ) ) - ).first ; -} - -//---------------------------------------------------------------------------- -// GCC native CAS supports int, long, unsigned int, unsigned long. -// Intel native CAS support int and long with the same interface as GCC. - -#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) - -KOKKOS_INLINE_FUNCTION -int atomic_compare_exchange( volatile int * const dest, const int compare, const int val) -{ return __sync_val_compare_and_swap(dest,compare,val); } - -KOKKOS_INLINE_FUNCTION -long atomic_compare_exchange( volatile long * const dest, const long compare, const long val ) -{ return __sync_val_compare_and_swap(dest,compare,val); } - -#if defined( KOKKOS_ATOMICS_USE_GCC ) - -// GCC supports unsigned - -KOKKOS_INLINE_FUNCTION -unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val ) -{ return __sync_val_compare_and_swap(dest,compare,val); } - -KOKKOS_INLINE_FUNCTION -unsigned long atomic_compare_exchange( volatile unsigned long * const dest , - const unsigned long compare , - const unsigned long val ) -{ return __sync_val_compare_and_swap(dest,compare,val); } - -#endif - -template < typename T > -KOKKOS_INLINE_FUNCTION -typename Kokkos::Impl::UnionPair::first_type -atomic_compare_exchange( volatile T * const dest, const T compare, const T val ) -{ - typedef Kokkos::Impl::UnionPair union_type ; - - return union_type( - __sync_val_compare_and_swap( union_type::cast( dest ) , - union_type::cast( compare ) , - union_type::cast( val ) ) - ).first ; -} - -//---------------------------------------------------------------------------- - -#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) - -template< typename T > -KOKKOS_INLINE_FUNCTION -T atomic_compare_exchange( volatile T * const dest, const T compare, const T val ) -{ - T retval; -#pragma omp critical - { - retval = dest[0]; - if ( retval == compare ) - dest[0] = val; - } - return retval; -} - -#endif - - -template -KOKKOS_INLINE_FUNCTION -bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val) -{ - return compare == atomic_compare_exchange(dest, compare, val); -} - -//---------------------------------------------------------------------------- - -} // namespace Kokkos - -#endif - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/kokkos/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp deleted file mode 100644 index 8d4965e..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp +++ /dev/null @@ -1,128 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP ) -#define KOKKOS_ATOMIC_EXCHANGE_HPP - -namespace Kokkos { - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ATOMICS_USE_CUDA ) - -KOKKOS_INLINE_FUNCTION -int atomic_exchange( volatile int * const dest , const int val ) -{ return atomicExch( (int*) dest , val ); } - -KOKKOS_INLINE_FUNCTION -unsigned int atomic_exchange( volatile unsigned int * const dest , const unsigned int val ) -{ return atomicExch( (unsigned int*) dest , val ); } - -KOKKOS_INLINE_FUNCTION -unsigned long long atomic_exchange( volatile unsigned long long * const dest , const unsigned long long val ) -{ return atomicExch( (unsigned long long*) dest , val ); } - -template < typename T > -KOKKOS_INLINE_FUNCTION -typename Kokkos::Impl::UnionPair::first_type -atomic_exchange( volatile T * const dest , const T val ) -{ - typedef Kokkos::Impl::UnionPair union_type ; - typedef typename union_type::second_type type ; - - return union_type( atomicExch( (type *) union_type::cast( dest ) , - union_type::cast( val ) ) - ).first ; -} - -//---------------------------------------------------------------------------- - -#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) - -template< typename T > -KOKKOS_INLINE_FUNCTION -typename Kokkos::Impl::UnionPair::first_type -atomic_exchange( volatile T * const dest , const T val ) -{ - typedef Kokkos::Impl::UnionPair union_type ; - - union_type assumed , old ; - - old.first = *dest ; - do { - assumed.second = old.second ; - old.second = __sync_val_compare_and_swap( union_type::cast( dest ), - assumed.second , - union_type::cast( val ) ); - } while ( assumed.second != old.second ); - - return old.first ; -} - -//---------------------------------------------------------------------------- - -#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) - -template < typename T > -KOKKOS_INLINE_FUNCTION -T atomic_exchange( volatile T * const dest , const T val ) -{ - T retval; -#pragma omp critical - { - retval = dest[0]; - dest[0] = val; - } - return retval; -} - -#endif - -//---------------------------------------------------------------------------- - -} // namespace Kokkos - -#endif - -//---------------------------------------------------------------------------- - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/kokkos/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp deleted file mode 100644 index 7411c28..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp +++ /dev/null @@ -1,162 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP ) -#define KOKKOS_ATOMIC_FETCH_ADD_HPP - -namespace Kokkos { - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ATOMICS_USE_CUDA ) - -// Support for int, unsigned int, unsigned long long int, and float - -KOKKOS_INLINE_FUNCTION -int atomic_fetch_add( volatile int * const dest , const int val ) -{ return atomicAdd((int*)dest,val); } - -KOKKOS_INLINE_FUNCTION -unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val ) -{ return atomicAdd((unsigned int*)dest,val); } - -KOKKOS_INLINE_FUNCTION -unsigned long long int atomic_fetch_add( volatile unsigned long long int * const dest , - const unsigned long long int val ) -{ return atomicAdd((unsigned long long int*)dest,val); } - -KOKKOS_INLINE_FUNCTION -float atomic_fetch_add( volatile float * const dest , const float val ) -{ return atomicAdd((float*)dest,val); } - -template < typename T > -KOKKOS_INLINE_FUNCTION -typename Kokkos::Impl::UnionPair::first_type -atomic_fetch_add( volatile T * const dest , const T val ) -{ - typedef Kokkos::Impl::UnionPair union_type ; - typedef typename union_type::second_type type ; - - union_type assumed , old , newval ; - - old.first = *dest ; - do { - assumed.second = old.second ; - newval.first = assumed.first + val ; - old.second = atomicCAS( (type *) union_type::cast( dest ), - assumed.second , - newval.second ); - } while ( assumed.second != old.second ); - - return old.first ; -} - -//---------------------------------------------------------------------------- - -#elif defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL) - -KOKKOS_INLINE_FUNCTION -int atomic_fetch_add( volatile int * const dest , const int val ) -{ return __sync_fetch_and_add(dest,val); } - -KOKKOS_INLINE_FUNCTION -long int atomic_fetch_add( volatile long int * const dest , const long int val ) -{ return __sync_fetch_and_add(dest,val); } - -#if defined( KOKKOS_ATOMICS_USE_GCC ) - -KOKKOS_INLINE_FUNCTION -unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val ) -{ return __sync_fetch_and_add(dest,val); } - -KOKKOS_INLINE_FUNCTION -unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val ) -{ return __sync_fetch_and_add(dest,val); } - -#endif - -template < typename T > -KOKKOS_INLINE_FUNCTION -typename Kokkos::Impl::UnionPair::first_type -atomic_fetch_add( volatile T * const dest , const T val ) -{ - typedef Kokkos::Impl::UnionPair union_type ; - - union_type assumed , old , newval ; - - old.first = *dest ; - do { - assumed.second = old.second ; - newval.first = assumed.first + val ; - old.second = __sync_val_compare_and_swap( union_type::cast( dest ), - assumed.second , - newval.second ); - } while ( assumed.second != old.second ); - - return old.first ; -} - -//---------------------------------------------------------------------------- - -#elif defined( KOKKOS_ATOMICS_USE_OMP31 ) - -template< typename T > -T atomic_fetch_add( volatile T * const dest , const T val ) -{ - T retval; -#pragma omp critical - { - retval = dest[0]; - dest[0] += val; - } - return retval; -} - -#endif - -//---------------------------------------------------------------------------- - -} - -#endif - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_CrsArray_factory.hpp b/kokkos/kokkos/core/src/impl/Kokkos_CrsArray_factory.hpp deleted file mode 100644 index 825e6cf..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_CrsArray_factory.hpp +++ /dev/null @@ -1,211 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP -#define KOKKOS_IMPL_CRSARRAY_FACTORY_HPP - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template< class DataType , class Arg1Type , class Arg2Type , typename SizeType > -inline -typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror -create_mirror( const CrsArray & view ) -{ - // Force copy: - typedef Impl::ViewAssignment< Impl::LayoutDefault > alloc ; - typedef CrsArray< DataType , Arg1Type , Arg2Type , SizeType > crsarray_type ; - - typename crsarray_type::HostMirror tmp ; - typename crsarray_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map ); - - tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const' - tmp.entries = create_mirror( view.entries ); - - // Deep copy: - deep_copy( tmp_row_map , view.row_map ); - deep_copy( tmp.entries , view.entries ); - - return tmp ; -} - -template< class DataType , class Arg1Type , class Arg2Type , typename SizeType > -inline -typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror -create_mirror_view( const CrsArray & view , - typename Impl::enable_if< ViewTraits::is_hostspace >::type * = 0 ) -{ - return view ; -} - -template< class DataType , class Arg1Type , class Arg2Type , typename SizeType > -inline -typename CrsArray< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror -create_mirror_view( const CrsArray & view , - typename Impl::enable_if< ! ViewTraits::is_hostspace >::type * = 0 ) -{ - return create_mirror( view ); -} - - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template< class CrsArrayType , class InputSizeType > -inline -typename CrsArrayType::crsarray_type -create_crsarray( const std::string & label , - const std::vector< InputSizeType > & input ) -{ - typedef CrsArrayType output_type ; - typedef std::vector< InputSizeType > input_type ; - - typedef typename output_type::entries_type entries_type ; - - typedef View< typename output_type::size_type [] , - typename output_type::array_layout , - typename output_type::device_type > work_type ; - - output_type output ; - - // Create the row map: - - const size_t length = input.size(); - - { - work_type row_work( "tmp" , length + 1 ); - - typename work_type::HostMirror row_work_host = - create_mirror_view( row_work ); - - size_t sum = 0 ; - row_work_host[0] = 0 ; - for ( size_t i = 0 ; i < length ; ++i ) { - row_work_host[i+1] = sum += input[i]; - } - - deep_copy( row_work , row_work_host ); - - output.entries = entries_type( label , sum ); - output.row_map = row_work ; - } - - return output ; -} - -//---------------------------------------------------------------------------- - -template< class CrsArrayType , class InputSizeType > -inline -typename CrsArrayType::crsarray_type -create_crsarray( const std::string & label , - const std::vector< std::vector< InputSizeType > > & input ) -{ - typedef CrsArrayType output_type ; - typedef std::vector< std::vector< InputSizeType > > input_type ; - typedef typename output_type::entries_type entries_type ; - typedef typename output_type::size_type size_type ; - - typedef typename - Impl::assert_shape_is_rank_one< typename entries_type::shape_type >::type - ok_rank ; - - typedef View< typename output_type::size_type [] , - typename output_type::array_layout , - typename output_type::device_type > work_type ; - - output_type output ; - - // Create the row map: - - const size_t length = input.size(); - - { - work_type row_work( "tmp" , length + 1 ); - - typename work_type::HostMirror row_work_host = - create_mirror_view( row_work ); - - size_t sum = 0 ; - row_work_host[0] = 0 ; - for ( size_t i = 0 ; i < length ; ++i ) { - row_work_host[i+1] = sum += input[i].size(); - } - - deep_copy( row_work , row_work_host ); - - output.entries = entries_type( label , sum ); - output.row_map = row_work ; - } - - // Fill in the entries: - { - typename entries_type::HostMirror host_entries = - create_mirror_view( output.entries ); - - size_t sum = 0 ; - for ( size_t i = 0 ; i < length ; ++i ) { - for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) { - host_entries( sum ) = input[i][j] ; - } - } - - deep_copy( output.entries , host_entries ); - } - - return output ; -} - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_Error.cpp b/kokkos/kokkos/core/src/impl/Kokkos_Error.cpp deleted file mode 100644 index cf762ae..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_Error.cpp +++ /dev/null @@ -1,184 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos -// Manycore Performance-Portable Multidimensional Arrays -// -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include -#include -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -void throw_runtime_exception( const std::string & msg ) -{ - std::ostringstream o ; - o << msg ; - traceback_callstack( o ); - throw std::runtime_error( o.str() ); -} - - -std::string human_memory_size(size_t arg_bytes) -{ - double bytes = arg_bytes; - const double K = 1024; - const double M = K*1024; - const double G = M*1024; - - std::ostringstream out; - if (bytes < K) { - out << std::setprecision(4) << bytes << " B"; - } else if (bytes < M) { - bytes /= K; - out << std::setprecision(4) << bytes << " K"; - } else if (bytes < G) { - bytes /= M; - out << std::setprecision(4) << bytes << " M"; - } else { - bytes /= G; - out << std::setprecision(4) << bytes << " G"; - } - return out.str(); -} - -} -} - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __GNUC__ ) && defined( ENABLE_TRACEBACK ) - -/* This is only known to work with GNU C++ - * Must be compiled with '-rdynamic' - * Must be linked with '-ldl' - */ - -/* Print call stack into an error stream, - * so one knows in which function the error occured. - * - * Code copied from: - * http://stupefydeveloper.blogspot.com/2008/10/cc-call-stack.html - * - * License on this site: - * This blog is licensed under a - * Creative Commons Attribution-Share Alike 3.0 Unported License. - * - * http://creativecommons.org/licenses/by-sa/3.0/ - * - * Modified to output to std::ostream. - */ -#include -#include -#include -#include -#include - -namespace Kokkos { -namespace Impl { - -void traceback_callstack( std::ostream & msg ) -{ - using namespace abi; - - enum { MAX_DEPTH = 32 }; - - void *trace[MAX_DEPTH]; - Dl_info dlinfo; - - int status; - - int trace_size = backtrace(trace, MAX_DEPTH); - - msg << std::endl << "Call stack {" << std::endl ; - - for (int i=1; i -#include - -namespace Kokkos { -namespace Impl { - -void throw_runtime_exception( const std::string & ); - -void traceback_callstack( std::ostream & ); - -std::string human_memory_size(size_t arg_bytes); - -} -} - -#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/kokkos/kokkos/core/src/impl/Kokkos_HostSpace.cpp deleted file mode 100644 index 487271c..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include -#include -#include -#include - -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace { - -class HostMemoryTrackingEntry : public Impl::MemoryTrackingEntry -{ -public: - - void * const ptr_alloc ; - - HostMemoryTrackingEntry( const std::string & arg_label , - const std::type_info & arg_info , - void * const arg_ptr , - const unsigned arg_size ) - : Impl::MemoryTrackingEntry( arg_label , arg_info , arg_ptr , arg_size ) - , ptr_alloc( arg_ptr ) - {} - - ~HostMemoryTrackingEntry(); -}; - -HostMemoryTrackingEntry::~HostMemoryTrackingEntry() -{ -#if defined( __INTEL_COMPILER ) - _mm_free( ptr_alloc ); -#else - free( ptr_alloc ); -#endif -} - -Impl::MemoryTracking & host_space_singleton() -{ - static Impl::MemoryTracking self("Kokkos::HostSpace"); - return self ; -} - -} // namespace -} // namespade Kokkos - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { - -void * host_allocate_not_thread_safe( - const std::string & label , - const std::type_info & scalar_type , - const size_t scalar_size , - const size_t scalar_count ) -{ - void * ptr = 0 ; - - if ( 0 < scalar_size && 0 < scalar_count ) { - void * ptr_alloc = 0 ; - size_t count_alloc = scalar_count ; - -#if defined( __INTEL_COMPILER ) - - ptr = ptr_alloc = _mm_malloc( scalar_size * count_alloc , MEMORY_ALIGNMENT ); - -#elif ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \ - ( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 ) - - posix_memalign( & ptr_alloc , MEMORY_ALIGNMENT , scalar_size * count_alloc ); - ptr = ptr_alloc ; - -#else - - // Over-allocate to guarantee enough aligned space. - - count_alloc += ( MEMORY_ALIGNMENT + scalar_size - 1 ) / scalar_size ; - - ptr_alloc = malloc( scalar_size * count_alloc ); - - ptr = static_cast(ptr_alloc) + - ( MEMORY_ALIGNMENT - reinterpret_cast(ptr_alloc) % MEMORY_ALIGNMENT ); - -#endif - - if ( ptr_alloc && ptr_alloc <= ptr && - 0 == ( reinterpret_cast(ptr) % MEMORY_ALIGNMENT ) ) { - host_space_singleton().insert( - new HostMemoryTrackingEntry( label , scalar_type , ptr_alloc , scalar_size * count_alloc ) ); - } - else { - std::ostringstream msg ; - msg << "Kokkos::Impl::host_allocate_not_thread_safe( " - << label - << " , " << scalar_type.name() - << " , " << scalar_size - << " , " << scalar_count - << " ) FAILED aligned memory allocation" ; - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - } - - return ptr ; -} - -void host_decrement_not_thread_safe( const void * ptr ) -{ - host_space_singleton().decrement( ptr ); -} - -DeepCopy::DeepCopy( void * dst , const void * src , size_t n ) -{ - memcpy( dst , src , n ); -} - -} -} - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace { - -static const int QUERY_DEVICE_IN_PARALLEL_MAX = 16 ; - -typedef int (* QueryDeviceInParallelPtr )(); - -QueryDeviceInParallelPtr s_in_parallel_query[ QUERY_DEVICE_IN_PARALLEL_MAX ] ; -int s_in_parallel_query_count = 0 ; - -} // namespace - -void HostSpace::register_in_parallel( int (*device_in_parallel)() ) -{ - if ( 0 == device_in_parallel ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel ERROR : given NULL" ) ); - } - - int i = -1 ; - - if ( ! (device_in_parallel)() ) { - for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i ); - } - - if ( i < s_in_parallel_query_count ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : called in_parallel" ) ); - - } - - if ( QUERY_DEVICE_IN_PARALLEL_MAX <= i ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : exceeded maximum" ) ); - - } - - for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i ); - - if ( i == s_in_parallel_query_count ) { - s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ; - } -} - -int HostSpace::in_parallel() -{ - const int n = s_in_parallel_query_count ; - - int i = 0 ; - - while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; } - - return i < n ; -} - -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -void * HostSpace::allocate( - const std::string & label , - const std::type_info & scalar_type , - const size_t scalar_size , - const size_t scalar_count ) -{ - if ( HostSpace::in_parallel() ) { - Kokkos::Impl::throw_runtime_exception( "Kokkos::HostSpace::allocate ERROR : called in parallel" ); - } - - void * const ptr = - Impl::host_allocate_not_thread_safe( label , scalar_type , scalar_size , scalar_count ); - - return ptr ; -} - -void HostSpace::increment( const void * ptr ) -{ - if ( ! HostSpace::in_parallel() ) { - host_space_singleton().increment( ptr ); - } -} - -void HostSpace::decrement( const void * ptr ) -{ - if ( ! HostSpace::in_parallel() ) { - Impl::host_decrement_not_thread_safe( ptr ); - } -} - -void HostSpace::print_memory_view( std::ostream & o ) -{ - host_space_singleton().print( o , std::string(" ") ); -} - -std::string HostSpace::query_label( const void * p ) -{ - const Impl::MemoryTrackingEntry * const info = - host_space_singleton().query( p ); - - return 0 != info ? info->label : std::string("ERROR NOT DEFINED"); -} - -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_MemoryTracking.cpp b/kokkos/kokkos/core/src/impl/Kokkos_MemoryTracking.cpp deleted file mode 100644 index 28aa65c..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_MemoryTracking.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include -#include -#include -#include - -#include -#include - -namespace Kokkos { -namespace Impl { -namespace { - -//---------------------------------------------------------------------------- -// Fast search for result[-1] <= val < result[0]. -// Requires result[max] == upper_bound. -// Start with a binary search until the search range is -// less than LINEAR_LIMIT, then switch to linear search. - -int upper_bound( const ptrdiff_t * const begin , unsigned length , - const ptrdiff_t val ) -{ - enum { LINEAR_LIMIT = 32 }; - - // precondition: begin[length-1] == std::numeric_limits::max() - - const ptrdiff_t * first = begin ; - - while ( LINEAR_LIMIT < length ) { - unsigned half = length >> 1 ; - const ptrdiff_t * middle = first + half ; - - if ( val < *middle ) { - length = half ; - } - else { - first = ++middle ; - length -= ++half ; - } - } - - for ( ; ! ( val < *first ) ; ++first ) {} - - return first - begin ; -} - -} // namespace - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -MemoryTracking::MemoryTracking( const std::string & space ) - : m_space( space ), m_tracking(), m_tracking_end() -{ - ptrdiff_t max = std::numeric_limits::max(); - void * const ptr = reinterpret_cast( max ); - - m_tracking.reserve(64); - m_tracking_end.reserve(64); - - // Sentinal value of end - - m_tracking.push_back( new MemoryTrackingEntry( "sentinal" , typeid(void) , ptr , 0 ) ); - m_tracking_end.push_back( max ); -} - -MemoryTracking::~MemoryTracking() -{ - const ptrdiff_t max = std::numeric_limits::max(); - - try { - if ( 1 < m_tracking.size() ) { - std::cerr << m_space << " destroyed with memory leaks:" << std::endl ; - print( std::cerr , std::string(" ") ); - } - else if ( 1 != m_tracking_end.size() || m_tracking_end.back() != max ) { - std::cerr << m_space << " corrupted data structure" << std::endl ; - } - } catch( ... ) {} -} - -void MemoryTracking::insert( MemoryTrackingEntry * entry ) -{ - const ptrdiff_t max = std::numeric_limits::max(); - - const bool ok_range = entry && - 0 < entry->begin && - entry->begin < entry->end && - entry->end < max ; - - int i = -1 ; - - if ( ok_range ) { - - i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , entry->begin ); - - // Guaranteed: - // a) entry->begin < m_tracking_end[i] - // b) i == 0 || m_tracking_end[i-1] <= entry->begin - - if ( entry->end <= m_tracking[i]->begin ) { - - // Non-overlapping range: - // m_tracking[i-1].end <= entry->begin < entry->end <= m_tracking[i].begin - - entry->m_count = 1 ; - - m_tracking.insert( m_tracking.begin() + i , entry ); - m_tracking_end.insert( m_tracking_end.begin() + i , entry->end ); - } - } - - if ( ! ok_range || -1 == i ) { - std::ostringstream msg ; - msg << "MemoryTracking(" << m_space << ")::insert( " ; - entry->print( msg ); - msg << " ) ERROR: " ; - - if ( ! ok_range ) { - msg << "Invalid memory range" ; - } - else { - msg << "Overlapping memory range with " ; - m_tracking[i]->print( msg ); - } - msg << " )" ; - throw_runtime_exception( msg.str() ); - } -} - -void MemoryTracking::increment( const void * ptr ) -{ - if ( ptr ) { - const ptrdiff_t p = reinterpret_cast( ptr ); - const int i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , p ); - - if ( m_tracking[i]->begin <= p ) { - ++( m_tracking[i]->m_count ); - } - else { - std::ostringstream msg ; - msg << "MemoryTracking(" << m_space - << ")::increment( " << p << " ) ERROR: Not being tracked" ; - throw_runtime_exception( msg.str() ); - } - } -} - -void MemoryTracking::decrement( const void * ptr ) -{ - if ( ptr ) { - const ptrdiff_t p = reinterpret_cast( ptr ); - const int i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , p ); - - if ( m_tracking[i]->begin <= p ) { - if ( 0 == --( m_tracking[i]->m_count ) ) { - - delete m_tracking[i] ; - - m_tracking.erase( m_tracking.begin() + i ); - m_tracking_end.erase( m_tracking_end.begin() + i ); - } - } - else { - std::ostringstream msg ; - msg << "MemoryTracking(" << m_space - << ")::decrement( " << p << " ) ERROR: Not being tracked" ; - throw_runtime_exception( msg.str() ); - } - } -} - -MemoryTrackingEntry * -MemoryTracking::query( const void * ptr ) const -{ - MemoryTrackingEntry * result = 0 ; - - if ( ptr ) { - const ptrdiff_t p = reinterpret_cast( ptr ); - - const int i = upper_bound( & m_tracking_end[0] , m_tracking_end.size() , p ); - - if ( m_tracking[i]->begin <= p ) result = m_tracking[i] ; - } - - return result ; -} - -void MemoryTracking::print( std::ostream & s , const std::string & lead ) const -{ - // Don't print the sentinal value: - const size_t n = m_tracking.size() - 1 ; - - for ( size_t i = 0 ; i < n ; ++i ) { - s << lead ; - m_tracking[i]->print( s ); - s << std::endl ; - } -} - -MemoryTrackingEntry::~MemoryTrackingEntry() -{} - -void MemoryTrackingEntry::print( std::ostream & s ) const -{ - s << "{ " - << "label(" << label << ") " - << "typeid(" << type.name() << ") " - << "range[ " << ((void*)begin) << " : " << ((void*)end) << " ) " - << "count(" << m_count << ") }" ; -} - -//---------------------------------------------------------------------------- - -} /* namespace Impl */ -} /* namespace Kokkos */ - - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_MemoryTracking.hpp b/kokkos/kokkos/core/src/impl/Kokkos_MemoryTracking.hpp deleted file mode 100644 index 7e1bbfb..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_MemoryTracking.hpp +++ /dev/null @@ -1,147 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_MEMORY_TRACKING_HPP -#define KOKKOS_MEMORY_TRACKING_HPP - -#include -#include -#include -#include -#include -#include - -namespace Kokkos { -namespace Impl { - -class MemoryTracking ; - -class MemoryTrackingEntry { -public: - const std::string label ; - const std::type_info & type ; - const ptrdiff_t begin ; - const ptrdiff_t end ; -private: - unsigned m_count ; -protected: - - MemoryTrackingEntry( const std::string & arg_label , - const std::type_info & arg_type , - const void * const arg_begin , - const unsigned arg_bytes ) - : label( arg_label ) - , type( arg_type ) - , begin( reinterpret_cast( arg_begin ) ) - , end( reinterpret_cast( - reinterpret_cast( arg_begin ) + arg_bytes ) ) - , m_count( 0 ) - {} - -public: - - unsigned count() const { return m_count ; } - - virtual void print( std::ostream & ) const ; - - virtual ~MemoryTrackingEntry(); - -private: - - MemoryTrackingEntry(); - MemoryTrackingEntry( const MemoryTrackingEntry & rhs ); - MemoryTrackingEntry & operator = ( const MemoryTrackingEntry & rhs ); - - friend class MemoryTracking ; -}; - - -class MemoryTracking { -public: - - /** \brief Track a memory range defined by the entry. - * This entry must be allocated via 'new'. - */ - void insert( MemoryTrackingEntry * entry ); - - /** \brief Decrement the tracked memory range. - * If the count is zero then the entry is deleted - * via the 'delete' operator. - */ - void decrement( const void * ptr ); - - /** \brief Increment the tracking count. */ - void increment( const void * ptr ); - - /** \brief Query a tracked memory range. */ - MemoryTrackingEntry * query( const void * ptr ) const ; - - /** \brief Call the 'print' method on all entries. */ - void print( std::ostream & , const std::string & lead ) const ; - - size_t size() const { return m_tracking.size(); } - - template< typename iType > - MemoryTracking & operator[]( const iType & i ) const - { return *m_tracking[i]; } - - explicit MemoryTracking( const std::string & space ); - - /** \brief Print memory leak warning for all entries. */ - ~MemoryTracking(); - -private: - MemoryTracking(); - MemoryTracking( const MemoryTracking & ); - MemoryTracking & operator = ( const MemoryTracking & ); - - std::string m_space ; - std::vector m_tracking ; - std::vector m_tracking_end ; -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -#endif - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp b/kokkos/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp deleted file mode 100644 index 68254df..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp +++ /dev/null @@ -1,82 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_PHYSICAL_LAYOUT_HPP -#define KOKKOS_PHYSICAL_LAYOUT_HPP - - -#include -namespace Kokkos { -namespace Impl { - - - -struct PhysicalLayout { - enum LayoutType {Left,Right,Scalar,Error}; - LayoutType layout_type; - int rank; - long long int stride[8]; //distance between two neighboring elements in a given dimension - - template< class T , class L , class D , class M > - PhysicalLayout( const View & view ) - : layout_type( is_same< typename View::array_layout , LayoutLeft >::value ? Left : ( - is_same< typename View::array_layout , LayoutRight >::value ? Right : Error )) - , rank( view.Rank ) - { - for(int i=0;i<8;i++) stride[i] = 0; - view.stride( stride ); - } - template< class T , class L , class D , class M > - PhysicalLayout( const View & view ) - : layout_type( is_same< typename View::array_layout , LayoutLeft >::value ? Left : ( - is_same< typename View::array_layout , LayoutRight >::value ? Right : Error )) - , rank( view.Rank ) - { - for(int i=0;i<8;i++) stride[i] = 0; - view.stride( stride ); - } -}; - -} -} -#endif diff --git a/kokkos/kokkos/core/src/impl/Kokkos_Serial.cpp b/kokkos/kokkos/core/src/impl/Kokkos_Serial.cpp deleted file mode 100644 index ba302f9..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_Serial.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace { - -struct Sentinel { - - void * m_reduce ; - unsigned m_reduce_size ; - - Sentinel() : m_reduce(0), m_reduce_size(0) {} - - ~Sentinel() { if ( m_reduce ) { free( m_reduce ); } } -}; - -} - -void * Serial::resize_reduce_scratch( unsigned size ) -{ - static Sentinel s ; - - const unsigned rem = size % Impl::MEMORY_ALIGNMENT ; - - if ( rem ) size += Impl::MEMORY_ALIGNMENT - rem ; - - if ( ( 0 == size ) || ( s.m_reduce_size < size ) ) { - - if ( s.m_reduce ) { free( s.m_reduce ); } - - s.m_reduce_size = size ; - - s.m_reduce = malloc( size ); - } - - return s.m_reduce ; -} - -} // namespace Kokkos - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_Shape.cpp b/kokkos/kokkos/core/src/impl/Kokkos_Shape.cpp deleted file mode 100644 index e3bf5d3..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_Shape.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - - -#include -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -void assert_counts_are_equal_throw( - const unsigned x_count , - const unsigned y_count ) -{ - std::ostringstream msg ; - - msg << "Kokkos::Impl::assert_counts_are_equal_throw( " - << x_count << " != " << y_count << " )" ; - - throw_runtime_exception( msg.str() ); -} - -void assert_shapes_are_equal_throw( - const unsigned x_scalar_size , - const unsigned x_rank , - const unsigned x_N0 , const unsigned x_N1 , - const unsigned x_N2 , const unsigned x_N3 , - const unsigned x_N4 , const unsigned x_N5 , - const unsigned x_N6 , const unsigned x_N7 , - - const unsigned y_scalar_size , - const unsigned y_rank , - const unsigned y_N0 , const unsigned y_N1 , - const unsigned y_N2 , const unsigned y_N3 , - const unsigned y_N4 , const unsigned y_N5 , - const unsigned y_N6 , const unsigned y_N7 ) -{ - std::ostringstream msg ; - - msg << "Kokkos::Impl::assert_shape_are_equal_throw( {" - << " scalar_size(" << x_scalar_size - << ") rank(" << x_rank - << ") dimension(" ; - if ( 0 < x_rank ) { msg << " " << x_N0 ; } - if ( 1 < x_rank ) { msg << " " << x_N1 ; } - if ( 2 < x_rank ) { msg << " " << x_N2 ; } - if ( 3 < x_rank ) { msg << " " << x_N3 ; } - if ( 4 < x_rank ) { msg << " " << x_N4 ; } - if ( 5 < x_rank ) { msg << " " << x_N5 ; } - if ( 6 < x_rank ) { msg << " " << x_N6 ; } - if ( 7 < x_rank ) { msg << " " << x_N7 ; } - msg << " ) } != { " - << " scalar_size(" << y_scalar_size - << ") rank(" << y_rank - << ") dimension(" ; - if ( 0 < y_rank ) { msg << " " << y_N0 ; } - if ( 1 < y_rank ) { msg << " " << y_N1 ; } - if ( 2 < y_rank ) { msg << " " << y_N2 ; } - if ( 3 < y_rank ) { msg << " " << y_N3 ; } - if ( 4 < y_rank ) { msg << " " << y_N4 ; } - if ( 5 < y_rank ) { msg << " " << y_N5 ; } - if ( 6 < y_rank ) { msg << " " << y_N6 ; } - if ( 7 < y_rank ) { msg << " " << y_N7 ; } - msg << " ) } )" ; - - throw_runtime_exception( msg.str() ); -} - -void AssertShapeBoundsAbort< Kokkos::HostSpace >::apply( - const size_t rank , - const size_t n0 , const size_t n1 , - const size_t n2 , const size_t n3 , - const size_t n4 , const size_t n5 , - const size_t n6 , const size_t n7 , - - const size_t arg_rank , - const size_t i0 , const size_t i1 , - const size_t i2 , const size_t i3 , - const size_t i4 , const size_t i5 , - const size_t i6 , const size_t i7 ) -{ - std::ostringstream msg ; - msg << "Kokkos::Impl::AssertShapeBoundsAbort( shape = {" ; - if ( 0 < rank ) { msg << " " << n0 ; } - if ( 1 < rank ) { msg << " " << n1 ; } - if ( 2 < rank ) { msg << " " << n2 ; } - if ( 3 < rank ) { msg << " " << n3 ; } - if ( 4 < rank ) { msg << " " << n4 ; } - if ( 5 < rank ) { msg << " " << n5 ; } - if ( 6 < rank ) { msg << " " << n6 ; } - if ( 7 < rank ) { msg << " " << n7 ; } - msg << " } index = {" ; - if ( 0 < arg_rank ) { msg << " " << i0 ; } - if ( 1 < arg_rank ) { msg << " " << i1 ; } - if ( 2 < arg_rank ) { msg << " " << i2 ; } - if ( 3 < arg_rank ) { msg << " " << i3 ; } - if ( 4 < arg_rank ) { msg << " " << i4 ; } - if ( 5 < arg_rank ) { msg << " " << i5 ; } - if ( 6 < arg_rank ) { msg << " " << i6 ; } - if ( 7 < arg_rank ) { msg << " " << i7 ; } - msg << " } )" ; - - throw_runtime_exception( msg.str() ); -} - -void assert_shape_effective_rank1_at_leastN_throw( - const size_t x_rank , const size_t x_N0 , - const size_t x_N1 , const size_t x_N2 , - const size_t x_N3 , const size_t x_N4 , - const size_t x_N5 , const size_t x_N6 , - const size_t x_N7 , - const size_t N0 ) -{ - std::ostringstream msg ; - - msg << "Kokkos::Impl::assert_shape_effective_rank1_at_leastN_throw( shape = {" ; - if ( 0 < x_rank ) { msg << " " << x_N0 ; } - if ( 1 < x_rank ) { msg << " " << x_N1 ; } - if ( 2 < x_rank ) { msg << " " << x_N2 ; } - if ( 3 < x_rank ) { msg << " " << x_N3 ; } - if ( 4 < x_rank ) { msg << " " << x_N4 ; } - if ( 5 < x_rank ) { msg << " " << x_N5 ; } - if ( 6 < x_rank ) { msg << " " << x_N6 ; } - if ( 7 < x_rank ) { msg << " " << x_N7 ; } - msg << " } N = " << N0 << " )" ; - - throw_runtime_exception( msg.str() ); -} - - - -} -} - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_Shape.hpp b/kokkos/kokkos/core/src/impl/Kokkos_Shape.hpp deleted file mode 100644 index f77e4e7..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_Shape.hpp +++ /dev/null @@ -1,894 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_SHAPE_HPP -#define KOKKOS_SHAPE_HPP - -#include -#include -#include -#include -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- -/** \brief The shape of a Kokkos with dynamic and static dimensions. - * Dynamic dimensions are member values and static dimensions are - * 'static const' values. - * - * The upper bound on the array rank is eight. - */ -template< unsigned ScalarSize , - unsigned Rank , - unsigned s0 = 1 , - unsigned s1 = 1 , - unsigned s2 = 1 , - unsigned s3 = 1 , - unsigned s4 = 1 , - unsigned s5 = 1 , - unsigned s6 = 1 , - unsigned s7 = 1 > -struct Shape ; - -template< class ShapeType , class Layout > -struct ShapeMap ; - -//---------------------------------------------------------------------------- -/** \brief Shape equality if the value type, layout, and dimensions - * are equal. - */ -template< unsigned xSize , unsigned xRank , - unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , - unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , - - unsigned ySize , unsigned yRank , - unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , - unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > -KOKKOS_INLINE_FUNCTION -bool operator == ( const Shape & x , - const Shape & y ) -{ - enum { same_size = xSize == ySize }; - enum { same_rank = xRank == yRank }; - - return same_size && same_rank && - unsigned( x.N0 ) == unsigned( y.N0 ) && - unsigned( x.N1 ) == unsigned( y.N1 ) && - unsigned( x.N2 ) == unsigned( y.N2 ) && - unsigned( x.N3 ) == unsigned( y.N3 ) && - unsigned( x.N4 ) == unsigned( y.N4 ) && - unsigned( x.N5 ) == unsigned( y.N5 ) && - unsigned( x.N6 ) == unsigned( y.N6 ) && - unsigned( x.N7 ) == unsigned( y.N7 ) ; -} - -template< unsigned xSize , unsigned xRank , - unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , - unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , - - unsigned ySize ,unsigned yRank , - unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , - unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > -KOKKOS_INLINE_FUNCTION -bool operator != ( const Shape & x , - const Shape & y ) -{ return ! operator == ( x , y ); } - -//---------------------------------------------------------------------------- - -void assert_counts_are_equal_throw( - const unsigned x_count , - const unsigned y_count ); - -inline -void assert_counts_are_equal( - const unsigned x_count , - const unsigned y_count ) -{ - if ( x_count != y_count ) { - assert_counts_are_equal_throw( x_count , y_count ); - } -} - -void assert_shapes_are_equal_throw( - const unsigned x_scalar_size , - const unsigned x_rank , - const unsigned x_N0 , const unsigned x_N1 , - const unsigned x_N2 , const unsigned x_N3 , - const unsigned x_N4 , const unsigned x_N5 , - const unsigned x_N6 , const unsigned x_N7 , - - const unsigned y_scalar_size , - const unsigned y_rank , - const unsigned y_N0 , const unsigned y_N1 , - const unsigned y_N2 , const unsigned y_N3 , - const unsigned y_N4 , const unsigned y_N5 , - const unsigned y_N6 , const unsigned y_N7 ); - -template< unsigned xSize , unsigned xRank , - unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , - unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , - - unsigned ySize , unsigned yRank , - unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , - unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > -inline -void assert_shapes_are_equal( - const Shape & x , - const Shape & y ) -{ - typedef Shape x_type ; - typedef Shape y_type ; - - if ( x != y ) { - assert_shapes_are_equal_throw( - x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7, - y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 ); - } -} - -template< unsigned xSize , unsigned xRank , - unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 , - unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 , - - unsigned ySize , unsigned yRank , - unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 , - unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 > -void assert_shapes_equal_dimension( - const Shape & x , - const Shape & y ) -{ - typedef Shape x_type ; - typedef Shape y_type ; - - // Omit comparison of scalar_size. - if ( unsigned( x.rank ) != unsigned( y.rank ) || - unsigned( x.N0 ) != unsigned( y.N0 ) || - unsigned( x.N1 ) != unsigned( y.N1 ) || - unsigned( x.N2 ) != unsigned( y.N2 ) || - unsigned( x.N3 ) != unsigned( y.N3 ) || - unsigned( x.N4 ) != unsigned( y.N4 ) || - unsigned( x.N5 ) != unsigned( y.N5 ) || - unsigned( x.N6 ) != unsigned( y.N6 ) || - unsigned( x.N7 ) != unsigned( y.N7 ) ) { - assert_shapes_are_equal_throw( - x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7, - y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 ); - } -} - -//---------------------------------------------------------------------------- - -template< class ShapeType > struct assert_shape_is_rank_zero ; -template< class ShapeType > struct assert_shape_is_rank_one ; - -template< unsigned Size > -struct assert_shape_is_rank_zero< Shape > - : public true_type {}; - -template< unsigned Size , unsigned s0 > -struct assert_shape_is_rank_one< Shape > - : public true_type {}; - -//---------------------------------------------------------------------------- - -/** \brief Array bounds assertion templated on the execution space - * to allow device-specific abort code. - */ -template< class ExecutionSpace > -struct AssertShapeBoundsAbort ; - -template<> -struct AssertShapeBoundsAbort< Kokkos::HostSpace > -{ - static void apply( const size_t rank , - const size_t n0 , const size_t n1 , - const size_t n2 , const size_t n3 , - const size_t n4 , const size_t n5 , - const size_t n6 , const size_t n7 , - const size_t arg_rank , - const size_t i0 , const size_t i1 , - const size_t i2 , const size_t i3 , - const size_t i4 , const size_t i5 , - const size_t i6 , const size_t i7 ); -}; - -template< class ExecutionDevice > -struct AssertShapeBoundsAbort -{ - KOKKOS_INLINE_FUNCTION - static void apply( const size_t rank , - const size_t n0 , const size_t n1 , - const size_t n2 , const size_t n3 , - const size_t n4 , const size_t n5 , - const size_t n6 , const size_t n7 , - const size_t arg_rank , - const size_t i0 , const size_t i1 , - const size_t i2 , const size_t i3 , - const size_t i4 , const size_t i5 , - const size_t i6 , const size_t i7 ) - { - AssertShapeBoundsAbort< Kokkos::HostSpace > - ::apply( rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 , - arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 ); - } -}; - -template< class ShapeType > -KOKKOS_INLINE_FUNCTION -void assert_shape_bounds( const ShapeType & shape , - const size_t arg_rank , - const size_t i0 , - const size_t i1 = 0 , - const size_t i2 = 0 , - const size_t i3 = 0 , - const size_t i4 = 0 , - const size_t i5 = 0 , - const size_t i6 = 0 , - const size_t i7 = 0 ) -{ - // Must supply at least as many indices as ranks. - // Every index must be within bounds. - const bool ok = ShapeType::rank <= arg_rank && - i0 < shape.N0 && - i1 < shape.N1 && - i2 < shape.N2 && - i3 < shape.N3 && - i4 < shape.N4 && - i5 < shape.N5 && - i6 < shape.N6 && - i7 < shape.N7 ; - - if ( ! ok ) { - AssertShapeBoundsAbort< ExecutionSpace > - ::apply( ShapeType::rank , - shape.N0 , shape.N1 , shape.N2 , shape.N3 , - shape.N4 , shape.N5 , shape.N6 , shape.N7 , - arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 ); - } -} - -#if defined( KOKKOS_EXPRESSION_CHECK ) -#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6); -#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7); -#else -#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */ -#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */ -#endif - - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -// Specialization and optimization for the Rank 0 shape. - -template < unsigned ScalarSize > -struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 0 }; - enum { rank = 0 }; - - enum { N0 = 1 }; - enum { N1 = 1 }; - enum { N2 = 1 }; - enum { N3 = 1 }; - enum { N4 = 1 }; - enum { N5 = 1 }; - enum { N6 = 1 }; - enum { N7 = 1 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - {} -}; - -//---------------------------------------------------------------------------- -// All-static dimension array - -template < unsigned ScalarSize , - unsigned Rank , - unsigned s0 , - unsigned s1 , - unsigned s2 , - unsigned s3 , - unsigned s4 , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape { - - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 0 }; - enum { rank = Rank }; - - enum { N0 = s0 }; - enum { N1 = s1 }; - enum { N2 = s2 }; - enum { N3 = s3 }; - enum { N4 = s4 }; - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - {} -}; - -// 1 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , - unsigned Rank , - unsigned s1 , - unsigned s2 , - unsigned s3 , - unsigned s4 , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 1 }; - enum { rank = Rank }; - - unsigned N0 ; - - enum { N1 = s1 }; - enum { N2 = s2 }; - enum { N3 = s3 }; - enum { N4 = s4 }; - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - { s.N0 = n0 ; } -}; - -// 2 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , unsigned Rank , - unsigned s2 , - unsigned s3 , - unsigned s4 , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 2 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - - enum { N2 = s2 }; - enum { N3 = s3 }; - enum { N4 = s4 }; - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - { s.N0 = n0 ; s.N1 = n1 ; } -}; - -// 3 == dynamic_rank <= rank <= 8 -template < unsigned Rank , unsigned ScalarSize , - unsigned s3 , - unsigned s4 , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7> -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 3 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - - enum { N3 = s3 }; - enum { N4 = s4 }; - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; } -}; - -// 4 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , unsigned Rank , - unsigned s4 , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 4 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - unsigned N3 ; - - enum { N4 = s4 }; - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , - unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; } -}; - -// 5 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , unsigned Rank , - unsigned s5 , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 5 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - unsigned N3 ; - unsigned N4 ; - - enum { N5 = s5 }; - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , - unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 ) - { s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; } -}; - -// 6 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , unsigned Rank , - unsigned s6 , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 6 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - unsigned N3 ; - unsigned N4 ; - unsigned N5 ; - - enum { N6 = s6 }; - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , - unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 ) - { - s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; - s.N4 = n4 ; s.N5 = n5 ; - } -}; - -// 7 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize , unsigned Rank , - unsigned s7 > -struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 7 }; - enum { rank = Rank }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - unsigned N3 ; - unsigned N4 ; - unsigned N5 ; - unsigned N6 ; - - enum { N7 = s7 }; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , - unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 ) - { - s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; - s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; - } -}; - -// 8 == dynamic_rank <= rank <= 8 -template < unsigned ScalarSize > -struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 > -{ - enum { scalar_size = ScalarSize }; - enum { rank_dynamic = 8 }; - enum { rank = 8 }; - - unsigned N0 ; - unsigned N1 ; - unsigned N2 ; - unsigned N3 ; - unsigned N4 ; - unsigned N5 ; - unsigned N6 ; - unsigned N7 ; - - KOKKOS_INLINE_FUNCTION - static - void assign( Shape & s , - unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 , - unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 ) - { - s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; - s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ; - } -}; - -//---------------------------------------------------------------------------- - -template< class ShapeType , unsigned N , - unsigned R = ShapeType::rank_dynamic > -struct ShapeInsert ; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 0 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - N , - ShapeType::N0 , - ShapeType::N1 , - ShapeType::N2 , - ShapeType::N3 , - ShapeType::N4 , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 1 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - N , - ShapeType::N1 , - ShapeType::N2 , - ShapeType::N3 , - ShapeType::N4 , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 2 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - N , - ShapeType::N2 , - ShapeType::N3 , - ShapeType::N4 , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 3 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - 0 , - N , - ShapeType::N3 , - ShapeType::N4 , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 4 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - 0 , - 0 , - N , - ShapeType::N4 , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 5 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - 0 , - 0 , - 0 , - N , - ShapeType::N5 , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 6 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - 0 , - 0 , - 0 , - 0 , - N , - ShapeType::N6 > type ; -}; - -template< class ShapeType , unsigned N > -struct ShapeInsert< ShapeType , N , 7 > -{ - typedef Shape< ShapeType::scalar_size , - ShapeType::rank + 1 , - 0 , - 0 , - 0 , - 0 , - 0 , - 0 , - 0 , - N > type ; -}; - -//---------------------------------------------------------------------------- - -template< class DstShape , class SrcShape , - unsigned DstRankDynamic = DstShape::rank_dynamic , - bool DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) > -struct ShapeCompatible { enum { value = false }; }; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 8 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 7 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 6 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 5 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 4 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N4) == unsigned(SrcShape::N4) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 3 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N3) == unsigned(SrcShape::N3) && - unsigned(DstShape::N4) == unsigned(SrcShape::N4) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 2 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N2) == unsigned(SrcShape::N2) && - unsigned(DstShape::N3) == unsigned(SrcShape::N3) && - unsigned(DstShape::N4) == unsigned(SrcShape::N4) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 1 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N1) == unsigned(SrcShape::N1) && - unsigned(DstShape::N2) == unsigned(SrcShape::N2) && - unsigned(DstShape::N3) == unsigned(SrcShape::N3) && - unsigned(DstShape::N4) == unsigned(SrcShape::N4) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -template< class DstShape , class SrcShape > -struct ShapeCompatible< DstShape , SrcShape , 0 , true > -{ - enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) && - unsigned(DstShape::N0) == unsigned(SrcShape::N0) && - unsigned(DstShape::N1) == unsigned(SrcShape::N1) && - unsigned(DstShape::N2) == unsigned(SrcShape::N2) && - unsigned(DstShape::N3) == unsigned(SrcShape::N3) && - unsigned(DstShape::N4) == unsigned(SrcShape::N4) && - unsigned(DstShape::N5) == unsigned(SrcShape::N5) && - unsigned(DstShape::N6) == unsigned(SrcShape::N6) && - unsigned(DstShape::N7) == unsigned(SrcShape::N7) }; -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< unsigned ScalarSize , unsigned Rank , - unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 , - unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 , - typename iType > -KOKKOS_INLINE_FUNCTION -size_t dimension( - const Shape & shape , - const iType & r ) -{ - return 0 == r ? shape.N0 : ( - 1 == r ? shape.N1 : ( - 2 == r ? shape.N2 : ( - 3 == r ? shape.N3 : ( - 4 == r ? shape.N4 : ( - 5 == r ? shape.N5 : ( - 6 == r ? shape.N6 : ( - 7 == r ? shape.N7 : 1 ))))))); -} - -template< unsigned ScalarSize , unsigned Rank , - unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 , - unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 > -size_t cardinality_count( - const Shape & shape ) -{ - return shape.N0 * shape.N1 * shape.N2 * shape.N3 * - shape.N4 * shape.N5 * shape.N6 * shape.N7 ; -} - -//---------------------------------------------------------------------------- - -} /* namespace Impl */ -} /* namespace Kokkos */ - -#endif /* #ifndef KOKKOS_CORESHAPE_HPP */ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_StaticAssert.hpp b/kokkos/kokkos/core/src/impl/Kokkos_StaticAssert.hpp deleted file mode 100644 index f1017c3..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_StaticAssert.hpp +++ /dev/null @@ -1,79 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_STATICASSERT_HPP -#define KOKKOS_STATICASSERT_HPP - -namespace Kokkos { -namespace Impl { - -template < bool , class T = void > -struct StaticAssert ; - -template< class T > -struct StaticAssert< true , T > { - typedef T type ; - static const bool value = true ; -}; - -template < class A , class B > -struct StaticAssertSame ; - -template < class A > -struct StaticAssertSame { typedef A type ; }; - -template < class A , class B > -struct StaticAssertAssignable ; - -template < class A > -struct StaticAssertAssignable { typedef A type ; }; - -template < class A > -struct StaticAssertAssignable< const A , A > { typedef const A type ; }; - -} // namespace Impl -} // namespace Kokkos - -#endif /* KOKKOS_STATICASSERT_HPP */ - - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_Timer.hpp b/kokkos/kokkos/core/src/impl/Kokkos_Timer.hpp deleted file mode 100644 index 700653b..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_Timer.hpp +++ /dev/null @@ -1,115 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_IMPLWALLTIME_HPP -#define KOKKOS_IMPLWALLTIME_HPP - -#include - -#ifdef _MSC_VER -#undef KOKKOS_USE_LIBRT -#include -#else -#ifdef KOKKOS_USE_LIBRT -#include -#else -#include -#endif -#endif - -namespace Kokkos { -namespace Impl { - -/** \brief Time since construction */ - -class Timer { -private: - #ifdef KOKKOS_USE_LIBRT - struct timespec m_old; - #else - struct timeval m_old ; - #endif - Timer( const Timer & ); - Timer & operator = ( const Timer & ); -public: - - inline - void reset() { - #ifdef KOKKOS_USE_LIBRT - clock_gettime(&m_old); - #else - gettimeofday( & m_old , ((struct timezone *) NULL ) ); - #endif - } - - inline - ~Timer() {} - - inline - Timer() { reset(); } - - inline - double seconds() const - { - #ifdef KOKKOS_USE_LIBRT - struct timespec m_new; - clock_gettime(&m_new); - - return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) + - ( (double) ( m_new.tv_nsec - m_old.tv_nsec ) * 1.0e-9 ); - #else - struct timeval m_new ; - - ::gettimeofday( & m_new , ((struct timezone *) NULL ) ); - - return ( (double) ( m_new.tv_sec - m_old.tv_sec ) ) + - ( (double) ( m_new.tv_usec - m_old.tv_usec ) * 1.0e-6 ); - #endif - } -}; - -} // namespace Impl -} // namespace Kokkos - -#endif /* #ifndef KOKKOS_IMPLWALLTIME_HPP */ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_Traits.hpp b/kokkos/kokkos/core/src/impl/Kokkos_Traits.hpp deleted file mode 100644 index 459a769..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_Traits.hpp +++ /dev/null @@ -1,274 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOSTRAITS_HPP -#define KOKKOSTRAITS_HPP - -#include -#include - -namespace Kokkos { -namespace Impl { - -/* C++11 conformal compile-time type traits utilities. - * Prefer to use C++11 when portably available. - */ -//---------------------------------------------------------------------------- -// C++11 Helpers: - -template < class T , T v > -struct integral_constant -{ - static const T value = v ; - typedef T value_type; - typedef integral_constant type; - KOKKOS_INLINE_FUNCTION operator T() { return v ; } -}; - -typedef integral_constant false_type ; -typedef integral_constant true_type ; - -//---------------------------------------------------------------------------- -// C++11 Type relationships: - -template< class X , class Y > struct is_same : public false_type {}; -template< class X > struct is_same : public true_type {}; - -//---------------------------------------------------------------------------- -// C++11 Type properties: - -template struct is_const : public false_type {}; -template struct is_const : public true_type {}; -template struct is_const : public true_type {}; - -//---------------------------------------------------------------------------- -// C++11 Type transformations: - -template struct remove_const { typedef T type; }; -template struct remove_const { typedef T type; }; -template struct remove_const { typedef T & type; }; - -template struct add_const { typedef const T type; }; -template struct add_const { typedef const T & type; }; -template struct add_const { typedef const T type; }; -template struct add_const { typedef const T & type; }; - -template struct remove_reference { typedef T type ; }; -template struct remove_reference< T & > { typedef T type ; }; -template struct remove_reference< const T & > { typedef const T type ; }; - -//---------------------------------------------------------------------------- -// C++11 Other type generators: - -template< bool , class T , class F > -struct condition { typedef F type ; }; - -template< class T , class F > -struct condition { typedef T type ; }; - -template< bool , class = void > -struct enable_if ; - -template< class T > -struct enable_if< true , T > { typedef T type ; }; - -//---------------------------------------------------------------------------- - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -// Other traits - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -template< class , class T = void > -struct enable_if_type { typedef T type ; }; - -//---------------------------------------------------------------------------- - -template< bool B > -struct bool_ : public integral_constant {}; - -template< unsigned I > -struct unsigned_ : public integral_constant {}; - -template< int I > -struct int_ : public integral_constant {}; - -//---------------------------------------------------------------------------- -// if_ - -template < bool Cond , typename TrueType , typename FalseType> -struct if_c -{ - enum { value = Cond }; - - typedef FalseType type; - - - typedef typename remove_const< - typename remove_reference::type >::type value_type ; - - typedef typename add_const::type const_value_type ; - - static KOKKOS_INLINE_FUNCTION - const_value_type & select( const_value_type & v ) { return v ; } - - static KOKKOS_INLINE_FUNCTION - value_type & select( value_type & v ) { return v ; } - - template< class T > - static KOKKOS_INLINE_FUNCTION - value_type & select( const T & ) { value_type * ptr(0); return *ptr ; } - - - template< class T > - static KOKKOS_INLINE_FUNCTION - const_value_type & select( const T & , const_value_type & v ) { return v ; } - - template< class T > - static KOKKOS_INLINE_FUNCTION - value_type & select( const T & , value_type & v ) { return v ; } -}; - -template -struct if_c< true , TrueType , FalseType > -{ - enum { value = true }; - - typedef TrueType type; - - - typedef typename remove_const< - typename remove_reference::type >::type value_type ; - - typedef typename add_const::type const_value_type ; - - static KOKKOS_INLINE_FUNCTION - const_value_type & select( const_value_type & v ) { return v ; } - - static KOKKOS_INLINE_FUNCTION - value_type & select( value_type & v ) { return v ; } - - template< class T > - static KOKKOS_INLINE_FUNCTION - value_type & select( const T & ) { value_type * ptr(0); return *ptr ; } - - - template< class F > - static KOKKOS_INLINE_FUNCTION - const_value_type & select( const_value_type & v , const F & ) { return v ; } - - template< class F > - static KOKKOS_INLINE_FUNCTION - value_type & select( value_type & v , const F & ) { return v ; } -}; - - -template -struct if_ : public if_c {}; - -//---------------------------------------------------------------------------- - -template -struct is_power_of_two -{ - enum type { value = (N > 0) && !(N & (N-1)) }; -}; - -template < size_t N , bool OK = is_power_of_two::value > -struct power_of_two ; - -template < size_t N > -struct power_of_two -{ - enum type { value = 1+ power_of_two<(N>>1),true>::value }; -}; - -template <> -struct power_of_two<2,true> -{ - enum type { value = 1 }; -}; - -template <> -struct power_of_two<1,true> -{ - enum type { value = 0 }; -}; - -//---------------------------------------------------------------------------- - -template< typename T , T v , bool NonZero = ( v != T(0) ) > -struct integral_nonzero_constant -{ - static const T value = v ; - typedef T value_type ; - typedef integral_nonzero_constant type ; - KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & ) {} -}; - -template< typename T , T zero > -struct integral_nonzero_constant -{ - const T value ; - typedef T value_type ; - typedef integral_nonzero_constant type ; - KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & v ) : value(v) {} -}; - -//---------------------------------------------------------------------------- - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOSTRAITS_HPP */ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_Utility.hpp b/kokkos/kokkos/core/src/impl/Kokkos_Utility.hpp deleted file mode 100644 index d80324c..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_Utility.hpp +++ /dev/null @@ -1,246 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_UTILITY_HPP -#define KOKKOS_UTILITY_HPP - -#include - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template < bool , class T , class F > struct or_ ; - -template < class T , class F > struct or_ { typedef T type ; }; -template < class T , class F > struct or_ { typedef F type ; }; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< typename T , typename TS1 , typename TS2 = TS1 > -union UnionPair -{ -private: - typedef typename or_< sizeof(T) == sizeof(TS2) , TS2 , void >::type ts2_type ; - typedef typename or_< sizeof(T) == sizeof(TS1) , TS1 , ts2_type >::type ts_type ; -public: - - typedef T first_type ; - typedef ts_type second_type ; - - first_type first ; - second_type second ; - - KOKKOS_INLINE_FUNCTION - UnionPair() {} - - KOKKOS_INLINE_FUNCTION - UnionPair( const second_type & rhs ) : second(rhs) {} - - KOKKOS_INLINE_FUNCTION - static - second_type * cast( first_type * const ptr ) - { return reinterpret_cast( ptr ); } - - KOKKOS_INLINE_FUNCTION - static - const second_type * cast( const first_type * const ptr ) - { return reinterpret_cast( ptr ); } - - KOKKOS_INLINE_FUNCTION - static - volatile second_type * cast( volatile first_type * const ptr ) - { return reinterpret_cast( ptr ); } - - KOKKOS_INLINE_FUNCTION - static - second_type & cast( first_type & ptr ) - { return reinterpret_cast( ptr ); } - - KOKKOS_INLINE_FUNCTION - static - const second_type & cast( const first_type & ptr ) - { return reinterpret_cast( ptr ); } - - KOKKOS_INLINE_FUNCTION - static - volatile second_type * cast( volatile first_type & ptr ) - { return reinterpret_cast( ptr ); } -}; - - -template< typename T > -union UnionPair -{ - typedef T first_type ; - typedef T second_type ; - - first_type first ; - second_type second ; - - KOKKOS_INLINE_FUNCTION - UnionPair() {} - - KOKKOS_INLINE_FUNCTION - UnionPair( const first_type & rhs ) : first(rhs) {} - - KOKKOS_INLINE_FUNCTION - static - second_type * cast( first_type * const ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - const second_type * cast( const first_type * const ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - volatile second_type * cast( volatile first_type * const ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - second_type & cast( first_type & ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - const second_type & cast( const first_type & ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - volatile second_type * cast( volatile first_type & ptr ) { return ptr ; } -}; - -template< typename T , typename TS2 > -union UnionPair -{ - typedef T first_type ; - typedef T second_type ; - - first_type first ; - second_type second ; - - KOKKOS_INLINE_FUNCTION - UnionPair() {} - - KOKKOS_INLINE_FUNCTION - UnionPair( const first_type & rhs ) : first(rhs) {} - - KOKKOS_INLINE_FUNCTION - static - second_type * cast( first_type * const ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - const second_type * cast( const first_type * const ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - volatile second_type * cast( volatile first_type * const ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - second_type & cast( first_type & ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - const second_type & cast( const first_type & ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - volatile second_type * cast( volatile first_type & ptr ) { return ptr ; } -}; - - -template< typename T , typename TS1 > -union UnionPair -{ - typedef T first_type ; - typedef T second_type ; - - first_type first ; - second_type second ; - - KOKKOS_INLINE_FUNCTION - UnionPair() {} - - KOKKOS_INLINE_FUNCTION - UnionPair( const first_type & rhs ) : first(rhs) {} - - KOKKOS_INLINE_FUNCTION - static - second_type * cast( first_type * const ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - const second_type * cast( const first_type * const ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - volatile second_type * cast( volatile first_type * const ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - second_type & cast( first_type & ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - const second_type & cast( const first_type & ptr ) { return ptr ; } - - KOKKOS_INLINE_FUNCTION - static - volatile second_type * cast( volatile first_type & ptr ) { return ptr ; } -}; - -} -} - -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_UTILITY_HPP */ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_ViewDefault.hpp b/kokkos/kokkos/core/src/impl/Kokkos_ViewDefault.hpp deleted file mode 100644 index c8279e4..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_ViewDefault.hpp +++ /dev/null @@ -1,656 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_VIEWDEFAULT_HPP -#define KOKKOS_VIEWDEFAULT_HPP - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template<> -struct ViewAssignment< LayoutDefault , LayoutDefault , void > -{ - typedef LayoutDefault Specialize ; - - //------------------------------------ - /** \brief Extract Rank-0 from Rank-1 */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const typename enable_if< ( - ViewAssignable< ViewTraits , - ViewTraits >::assignable_value && - ( ViewTraits::rank == 0 ) && - ( ViewTraits::rank == 1 ) - ), unsigned >::type i0 ) - { - typedef ViewTraits dst_traits ; - - assert_shape_bounds( src.m_shape , 1 , i0 ); - - ViewTracking< dst_traits >::decrement( dst.m_ptr_on_device ); - - dst.m_ptr_on_device = src.m_ptr_on_device + i0 ; - - ViewTracking< dst_traits >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Extract Rank-0 from Rank-2 */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const typename enable_if< ( - ViewAssignable< ViewTraits , - ViewTraits >::assignable_value && - ( ViewTraits::rank == 0 ) && - ( ViewTraits::rank == 2 ) - ), unsigned >::type i0 , - const unsigned i1 ) - { - typedef ViewTraits dst_traits ; - typedef ViewTraits src_traits ; - - enum { is_left = is_same< typename src_traits::array_layout , LayoutLeft >::value }; - - assert_shape_bounds( src.m_shape , 2 , i0 , i1 ); - - ViewTracking< dst_traits >::decrement( dst.m_ptr_on_device ); - - if ( is_left ) { - dst.m_ptr_on_device = src.m_ptr_on_device + i0 + src.m_stride.value * i1 ; - } - else { - dst.m_ptr_on_device = src.m_ptr_on_device + i1 + i0 * src.m_stride.value ; - } - - ViewTracking< dst_traits >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Extract Rank-0 from Rank-3 */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const typename enable_if< ( - ViewAssignable< ViewTraits , - ViewTraits >::assignable_value && - ( ViewTraits::rank == 0 ) && - ( ViewTraits::rank == 3 ) - ), unsigned >::type i0 , - const unsigned i1 , - const unsigned i2 ) - { - typedef ViewTraits dst_traits ; - typedef ViewTraits src_traits ; - - enum { is_left = is_same< typename src_traits::array_layout , LayoutLeft >::value }; - - assert_shape_bounds( src.m_shape, 3, i0, i1, i2 ); - - ViewTracking< dst_traits >::decrement( dst.m_ptr_on_device ); - - if ( is_left ) { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i0 + src.m_stride.value * ( - i1 + src.m_shape.N1 * ( - i2 )); - } - else { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i2 + src.m_shape.N2 * ( - i1 ) + i0 * src.m_stride.value ; - } - - ViewTracking< dst_traits >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Extract Rank-0 from Rank-4 */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const typename enable_if< ( - ViewAssignable< ViewTraits , - ViewTraits >::assignable_value && - ( ViewTraits::rank == 0 ) && - ( ViewTraits::rank == 4 ) - ), unsigned >::type i0 , - const unsigned i1 , - const unsigned i2 , - const unsigned i3 ) - { - typedef ViewTraits dst_traits ; - typedef ViewTraits src_traits ; - - enum { is_left = is_same< typename src_traits::array_layout , LayoutLeft >::value }; - - assert_shape_bounds( src.m_shape, 4, i0, i1, i2, i3 ); - - ViewTracking< dst_traits >::decrement( dst.m_ptr_on_device ); - - if ( is_left ) { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i0 + src.m_stride.value * ( - i1 + src.m_shape.N1 * ( - i2 + src.m_shape.N2 * ( - i3 ))); - } - else { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i3 + src.m_shape.N3 * ( - i2 + src.m_shape.N2 * ( - i1 )) + i0 * src.m_stride.value ; - } - - ViewTracking< dst_traits >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Extract Rank-0 from Rank-5 */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const typename enable_if< ( - ViewAssignable< ViewTraits , - ViewTraits >::assignable_value && - ( ViewTraits::rank == 0 ) && - ( ViewTraits::rank == 5 ) - ), unsigned >::type i0 , - const unsigned i1 , - const unsigned i2 , - const unsigned i3 , - const unsigned i4 ) - { - typedef ViewTraits dst_traits ; - typedef ViewTraits src_traits ; - - enum { is_left = is_same< typename src_traits::array_layout , LayoutLeft >::value }; - - assert_shape_bounds( src.m_shape, 5, i0, i1, i2, i3, i4); - - ViewTracking< dst_traits >::decrement( dst.m_ptr_on_device ); - - if ( is_left ) { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i0 + src.m_stride.value * ( - i1 + src.m_shape.N1 * ( - i2 + src.m_shape.N2 * ( - i3 + src.m_shape.N3 * ( - i4 )))); - } - else { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i4 + src.m_shape.N4 * ( - i3 + src.m_shape.N3 * ( - i2 + src.m_shape.N2 * ( - i1 ))) + i0 * src.m_stride.value ; - } - - ViewTracking< dst_traits >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Extract Rank-0 from Rank-6 */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const typename enable_if< ( - ViewAssignable< ViewTraits , - ViewTraits >::assignable_value && - ( ViewTraits::rank == 0 ) && - ( ViewTraits::rank == 6 ) - ), unsigned >::type i0 , - const unsigned i1 , - const unsigned i2 , - const unsigned i3 , - const unsigned i4 , - const unsigned i5 ) - { - typedef ViewTraits dst_traits ; - typedef ViewTraits src_traits ; - - enum { is_left = is_same< typename src_traits::array_layout , LayoutLeft >::value }; - - assert_shape_bounds( src.m_shape, 6, i0, i1, i2, i3, i4, i5); - - ViewTracking< dst_traits >::decrement( dst.m_ptr_on_device ); - - if ( is_left ) { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i0 + src.m_stride.value * ( - i1 + src.m_shape.N1 * ( - i2 + src.m_shape.N2 * ( - i3 + src.m_shape.N3 * ( - i4 + src.m_shape.N4 * ( - i5 ))))); - } - else { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i5 + src.m_shape.N5 * ( - i4 + src.m_shape.N4 * ( - i3 + src.m_shape.N3 * ( - i2 + src.m_shape.N2 * ( - i1 )))) + i0 * src.m_stride.value ; - } - - ViewTracking< dst_traits >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Extract Rank-0 from Rank-7 */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const typename enable_if< ( - ViewAssignable< ViewTraits , - ViewTraits >::assignable_value && - ( ViewTraits::rank == 0 ) && - ( ViewTraits::rank == 7 ) - ), unsigned >::type i0 , - const unsigned i1 , - const unsigned i2 , - const unsigned i3 , - const unsigned i4 , - const unsigned i5 , - const unsigned i6 ) - { - typedef ViewTraits dst_traits ; - typedef ViewTraits src_traits ; - - enum { is_left = is_same< typename src_traits::array_layout , LayoutLeft >::value }; - - assert_shape_bounds( src.m_shape, 7, i0, i1, i2, i3, i4, i5, i6 ); - - ViewTracking< dst_traits >::decrement( dst.m_ptr_on_device ); - - if ( is_left ) { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i0 + src.m_stride.value * ( - i1 + src.m_shape.N1 * ( - i2 + src.m_shape.N2 * ( - i3 + src.m_shape.N3 * ( - i4 + src.m_shape.N4 * ( - i5 + src.m_shape.N5 * ( - i6 )))))); - } - else { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i6 + src.m_shape.N6 * ( - i5 + src.m_shape.N5 * ( - i4 + src.m_shape.N4 * ( - i3 + src.m_shape.N3 * ( - i2 + src.m_shape.N2 * ( - i1 ))))) + i0 * src.m_stride.value ; - } - - ViewTracking< dst_traits >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Extract Rank-0 from Rank-8 */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const typename enable_if< ( - ViewAssignable< ViewTraits , - ViewTraits >::assignable_value && - ( ViewTraits::rank == 0 ) && - ( ViewTraits::rank == 8 ) - ), unsigned >::type i0 , - const unsigned i1 , - const unsigned i2 , - const unsigned i3 , - const unsigned i4 , - const unsigned i5 , - const unsigned i6 , - const unsigned i7 ) - { - typedef ViewTraits dst_traits ; - typedef ViewTraits src_traits ; - - enum { is_left = is_same< typename src_traits::array_layout , LayoutLeft >::value }; - - assert_shape_bounds( src.m_shape, 8, i0, i1, i2, i3, i4, i5, i6, i7 ); - - ViewTracking< dst_traits >::decrement( dst.m_ptr_on_device ); - - if ( is_left ) { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i0 + src.m_stride.value * ( - i1 + src.m_shape.N1 * ( - i2 + src.m_shape.N2 * ( - i3 + src.m_shape.N3 * ( - i4 + src.m_shape.N4 * ( - i5 + src.m_shape.N5 * ( - i6 + src.m_shape.N6 * i7 )))))); - } - else { - dst.m_ptr_on_device = - src.m_ptr_on_device + - i7 + src.m_shape.N7 * ( - i6 + src.m_shape.N6 * ( - i5 + src.m_shape.N5 * ( - i4 + src.m_shape.N4 * ( - i3 + src.m_shape.N3 * ( - i2 + src.m_shape.N2 * ( - i1 )))))) + i0 * src.m_stride.value ; - } - - ViewTracking< dst_traits >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Extract Rank-1 array from range of Rank-1 array, either layout */ - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM , - typename iType > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const std::pair & range , - typename enable_if< ( - ViewAssignable< ViewTraits , ViewTraits >::assignable_value - && - ( ViewTraits::rank == 1 ) - && - ( ViewTraits::rank == 1 ) - && - ( ViewTraits::rank_dynamic == 1 ) - ) >::type * = 0 ) - { - typedef ViewTraits traits_type ; - typedef typename traits_type::shape_type shape_type ; - - ViewTracking< traits_type >::decrement( dst.m_ptr_on_device ); - - dst.m_shape.N0 = 0 ; - dst.m_ptr_on_device = 0 ; - - if ( range.first < range.second ) { - assert_shape_bounds( src.m_shape , 1 , range.first ); - assert_shape_bounds( src.m_shape , 1 , range.second - 1 ); - - dst.m_shape.N0 = range.second - range.first ; - dst.m_ptr_on_device = src.m_ptr_on_device + range.first ; - - ViewTracking< traits_type >::increment( dst.m_ptr_on_device ); - } - } - - //------------------------------------ - /** \brief Extract Rank-1 array from LayoutLeft Rank-2 array. */ - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const ALL & , - const typename enable_if< ( - ViewAssignable< ViewTraits , ViewTraits >::assignable_value - && - is_same< typename ViewTraits::array_layout , LayoutLeft >::value - && - ( ViewTraits::rank == 2 ) - && - ( ViewTraits::rank == 1 ) - && - ( ViewTraits::rank_dynamic == 1 ) - ), unsigned >::type i1 ) - { - typedef ViewTraits traits_type ; - - ViewTracking< traits_type >::decrement( dst.m_ptr_on_device ); - - dst.m_shape.N0 = src.m_shape.N0 ; - dst.m_ptr_on_device = src.m_ptr_on_device + src.m_stride.value * i1 ; - - ViewTracking< traits_type >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Extract Rank-1 array from LayoutRight Rank-2 array. */ - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const unsigned i0 , - const typename enable_if< ( - ViewAssignable< ViewTraits , ViewTraits >::assignable_value - && - is_same< typename ViewTraits::array_layout , LayoutRight >::value - && - ( ViewTraits::rank == 2 ) - && - ( ViewTraits::rank == 1 ) - && - ( ViewTraits::rank_dynamic == 1 ) - ), ALL >::type & ) - { - typedef ViewTraits traits_type ; - - ViewTracking< traits_type >::decrement( dst.m_ptr_on_device ); - - dst.m_shape.N0 = src.m_shape.N1 ; - dst.m_ptr_on_device = src.m_ptr_on_device + src.m_stride.value * i0 ; - - ViewTracking< traits_type >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Extract LayoutRight Rank-N array from range of LayoutRight Rank-N array */ - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM , - typename iType > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const std::pair & range , - typename enable_if< ( - ViewAssignable< ViewTraits , ViewTraits >::value - && - Impl::is_same< typename ViewTraits::array_layout , LayoutRight >::value - && - ( ViewTraits::rank > 1 ) - && - ( ViewTraits::rank_dynamic > 0 ) - )>::type * = 0 ) - { - typedef ViewTraits traits_type ; - typedef typename traits_type::shape_type shape_type ; - typedef typename View::stride_type stride_type ; - - ViewTracking< traits_type >::decrement( dst.m_ptr_on_device ); - - shape_type ::assign( dst.m_shape, 0, 0, 0, 0, 0, 0, 0, 0 ); - stride_type::assign( dst.m_stride , 0 ); - dst.m_ptr_on_device = 0 ; - - if ( range.first < range.second ) { - assert_shape_bounds( src.m_shape , 8 , range.first , 0,0,0,0,0,0,0); - assert_shape_bounds( src.m_shape , 8 , range.second - 1 , 0,0,0,0,0,0,0); - - shape_type::assign( dst.m_shape, range.second - range.first , - src.m_shape.N1 , src.m_shape.N2 , src.m_shape.N3 , - src.m_shape.N4 , src.m_shape.N5 , src.m_shape.N6 , src.m_shape.N7 ); - - stride_type::assign( dst.m_stride , src.m_stride.value ); - - dst.m_ptr_on_device = src.m_ptr_on_device + range.first * src.m_stride.value ; - - ViewTracking< traits_type >::increment( dst.m_ptr_on_device ); - } - } - - //------------------------------------ - /** \brief Extract rank-2 from rank-2 array */ - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM , - typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const std::pair & range0 , - const std::pair & range1 , - typename enable_if< ( - ViewAssignable< ViewTraits , ViewTraits >::value - && - ViewTraits::rank == 2 - && - ViewTraits::rank_dynamic == 2 - ) >::type * = 0 ) - { - typedef ViewTraits traits_type ; - typedef typename traits_type::shape_type shape_type ; - enum { left = is_same< typename traits_type::array_layout , LayoutLeft >::value }; - - ViewTracking< traits_type >::decrement( dst.m_ptr_on_device ); - - dst.m_shape.N0 = 0 ; - dst.m_shape.N1 = 0 ; - dst.m_stride.value = 0 ; - dst.m_ptr_on_device = 0 ; - - if ( range0.first < range0.second && range1.first < range1.second ) { - assert_shape_bounds( src.m_shape , 2 , range0.first , range1.first ); - assert_shape_bounds( src.m_shape , 2 , range0.second - 1 , range1.second - 1 ); - - dst.m_shape.N0 = range0.second - range0.first ; - dst.m_shape.N1 = range1.second - range1.first ; - dst.m_stride = src.m_stride ; - - if ( left ) { - // operator: dst.m_ptr_on_device[ i0 + dst.m_stride * i1 ] - dst.m_ptr_on_device = src.m_ptr_on_device + range0.first + dst.m_stride.value * range1.first ; - } - else { - // operator: dst.m_ptr_on_device[ i0 * dst.m_stride + i1 ] - dst.m_ptr_on_device = src.m_ptr_on_device + range0.first * dst.m_stride.value + range1.first ; - } - - ViewTracking< traits_type >::increment( dst.m_ptr_on_device ); - } - } - - //------------------------------------ - /** \brief Deep copy data from compatible value type, layout, rank, and specialization. - * Check the dimensions and allocation lengths at runtime. - */ - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - inline static - void deep_copy( const View & dst , - const View & src , - const typename Impl::enable_if<( - Impl::is_same< typename ViewTraits::scalar_type , - typename ViewTraits::non_const_scalar_type >::value - && - Impl::is_same< typename ViewTraits::array_layout , - typename ViewTraits::array_layout >::value - && - ( unsigned(ViewTraits::rank) == unsigned(ViewTraits::rank) ) - )>::type * = 0 ) - { - typedef typename ViewTraits::memory_space dst_memory_space ; - typedef typename ViewTraits::memory_space src_memory_space ; - - if ( dst.m_ptr_on_device != src.m_ptr_on_device ) { - - Impl::assert_shapes_are_equal( dst.m_shape , src.m_shape ); - - const size_t nbytes = dst.m_shape.scalar_size * capacity( dst.m_shape , dst.m_stride ); - - DeepCopy< dst_memory_space , src_memory_space >( dst.m_ptr_on_device , src.m_ptr_on_device , nbytes ); - } - } -}; - -//---------------------------------------------------------------------------- - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_VIEWDEFAULT_HPP */ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_ViewSupport.hpp b/kokkos/kokkos/core/src/impl/Kokkos_ViewSupport.hpp deleted file mode 100644 index 29d195b..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_ViewSupport.hpp +++ /dev/null @@ -1,510 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_VIEWSUPPORT_HPP -#define KOKKOS_VIEWSUPPORT_HPP - -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** \brief Evaluate if LHS = RHS view assignment is allowed. */ -template< class ViewLHS , class ViewRHS > -struct ViewAssignable -{ - // Same memory space. - // Same value type. - // Compatible 'const' qualifier - // Cannot assign managed = unmannaged - enum { assignable_value = - ( is_same< typename ViewLHS::value_type , - typename ViewRHS::value_type >::value - || - is_same< typename ViewLHS::value_type , - typename ViewRHS::const_value_type >::value ) - && - is_same< typename ViewLHS::memory_space , - typename ViewRHS::memory_space >::value - && - ( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) ) - }; - - enum { assignable_shape = - // Compatible shape and matching layout: - ( ShapeCompatible< typename ViewLHS::shape_type , - typename ViewRHS::shape_type >::value - && - is_same< typename ViewLHS::array_layout , - typename ViewRHS::array_layout >::value ) - || - // Matching layout, same rank, and LHS dynamic rank - ( is_same< typename ViewLHS::array_layout , - typename ViewRHS::array_layout >::value - && - int(ViewLHS::rank) == int(ViewRHS::rank) - && - int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) ) - || - // Both rank-0, any shape and layout - ( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 ) - || - // Both rank-1 and LHS is dynamic rank-1, any shape and layout - ( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 && - int(ViewLHS::rank_dynamic) == 1 ) - }; - - enum { value = assignable_value && assignable_shape }; -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class ShapeType , class LayoutType , class Enable = void > -class LayoutStride ; - -/* Arrays with rank <= 1 have no stride */ -template< class ShapeType , class LayoutType > -class LayoutStride< ShapeType , LayoutType , - typename enable_if< ShapeType::rank <= 1 >::type > -{ -public: - - enum { dynamic = false }; - enum { value = 0 }; - - KOKKOS_INLINE_FUNCTION static - void assign( LayoutStride & , const unsigned ) {} - - KOKKOS_INLINE_FUNCTION static - void assign_no_padding( LayoutStride & , const ShapeType & ) {} - - KOKKOS_INLINE_FUNCTION static - void assign_with_padding( LayoutStride & , const ShapeType & ) {} -}; - -/* Array with LayoutLeft and 0 == rank_dynamic have static stride that are is not padded. */ -template< class ShapeType > -class LayoutStride< ShapeType , LayoutLeft , - typename enable_if<( - ( 1 < ShapeType::rank ) && - ( 0 == ShapeType::rank_dynamic ) - )>::type > -{ -public: - - enum { dynamic = false }; - enum { value = ShapeType::N0 }; - - KOKKOS_INLINE_FUNCTION static - void assign( LayoutStride & , const unsigned ) {} - - KOKKOS_INLINE_FUNCTION static - void assign_no_padding( LayoutStride & , const ShapeType & ) {} - - KOKKOS_INLINE_FUNCTION static - void assign_with_padding( LayoutStride & , const ShapeType & ) {} -}; - -/* Array with LayoutRight and 1 >= rank_dynamic have static stride that is not padded */ -template< class ShapeType > -class LayoutStride< ShapeType , LayoutRight , - typename enable_if<( - ( 1 < ShapeType::rank ) && - ( 1 >= ShapeType::rank_dynamic ) - )>::type > -{ -public: - - enum { dynamic = false }; - enum { value = ShapeType::N1 * ShapeType::N2 * ShapeType::N3 * - ShapeType::N4 * ShapeType::N5 * ShapeType::N6 * ShapeType::N7 }; - - KOKKOS_INLINE_FUNCTION static - void assign( LayoutStride & , const unsigned ) {} - - KOKKOS_INLINE_FUNCTION static - void assign_no_padding( LayoutStride & , const ShapeType & ) {} - - KOKKOS_INLINE_FUNCTION static - void assign_with_padding( LayoutStride & , const ShapeType & ) {} -}; - - -/* Otherwise array has runtime stride that is padded. */ -template< class ShapeType , class LayoutType , class Enable > -class LayoutStride -{ -public: - - enum { dynamic = true }; - - unsigned value ; - - KOKKOS_INLINE_FUNCTION static - void assign( LayoutStride & stride , const unsigned n ) { stride.value = n ; } - - KOKKOS_INLINE_FUNCTION static - void assign_no_padding( LayoutStride & vs , const ShapeType & sh ) - { - enum { left = is_same< LayoutType , LayoutLeft >::value }; - - // Left layout arrays are aligned on the first dimension. - // Right layout arrays are aligned on blocks of the 2-8th dimensions. - vs.value = ShapeType::rank <= 1 ? 0 : ( - left ? sh.N0 - : sh.N1 * sh.N2 * sh.N3 * sh.N4 * sh.N5 * sh.N6 * sh.N7 ); - } - - KOKKOS_INLINE_FUNCTION static - void assign_with_padding( LayoutStride & vs , const ShapeType & sh ) - { - enum { div = MEMORY_ALIGNMENT / ShapeType::scalar_size }; - enum { mod = MEMORY_ALIGNMENT % ShapeType::scalar_size }; - enum { align = 0 == mod ? div : 0 }; - - assign_no_padding( vs , sh ); - - if ( align && MEMORY_ALIGNMENT_THRESHOLD * align < vs.value ) { - - const unsigned count_mod = vs.value % ( div ? div : 1 ); - - if ( count_mod ) { vs.value += align - count_mod ; } - } - } -}; - -template< class ShapeType , class LayoutType > -KOKKOS_INLINE_FUNCTION -size_t capacity( const ShapeType & shape , - const LayoutStride< ShapeType , LayoutType > & stride ) -{ - enum { left = is_same< LayoutType , LayoutLeft >::value }; - - return ShapeType::rank <= 1 ? size_t(shape.N0) : ( - left ? size_t( stride.value * shape.N1 * shape.N2 * shape.N3 * shape.N4 * shape.N5 * shape.N6 * shape.N7 ) - : size_t( stride.value * shape.N0 )); -} - -template< typename iType , class ShapeType , class LayoutType > -KOKKOS_INLINE_FUNCTION -void stride( iType * const s , const ShapeType & shape , - const LayoutStride< ShapeType , LayoutType > & stride ) -{ - enum { rank = ShapeType::rank }; - enum { left = is_same< LayoutType , LayoutLeft >::value }; - - if ( 0 < rank ) { - if ( 1 == rank ) { - s[0] = 1 ; - } - else if ( left ) { - s[0] = 1 ; - s[1] = stride.value ; - if ( 2 < rank ) { s[2] = s[1] * shape.N1 ; } - if ( 3 < rank ) { s[3] = s[2] * shape.N2 ; } - if ( 4 < rank ) { s[4] = s[3] * shape.N3 ; } - if ( 5 < rank ) { s[5] = s[4] * shape.N4 ; } - if ( 6 < rank ) { s[6] = s[5] * shape.N5 ; } - if ( 7 < rank ) { s[7] = s[6] * shape.N6 ; } - } - else { - s[rank-1] = 1 ; - if ( 7 < rank ) { s[6] = s[7] * shape.N7 ; } - if ( 6 < rank ) { s[5] = s[6] * shape.N6 ; } - if ( 5 < rank ) { s[4] = s[5] * shape.N5 ; } - if ( 4 < rank ) { s[3] = s[4] * shape.N4 ; } - if ( 3 < rank ) { s[2] = s[3] * shape.N3 ; } - if ( 2 < rank ) { s[1] = s[2] * shape.N2 ; } - s[0] = stride.value ; - } - } -} - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** \brief View tracking increment/decrement only happens when - * view memory is managed and executing in the host space. - */ -template< class ViewTraits , class Enable = void > -struct ViewTracking { - KOKKOS_INLINE_FUNCTION static void increment( const void * ) {} - KOKKOS_INLINE_FUNCTION static void decrement( const void * ) {} -}; - -template< class ViewTraits > -struct ViewTracking< ViewTraits , - typename enable_if<( - ViewTraits::is_managed && - Impl::is_same< HostSpace , ExecutionSpace >::value - )>::type > -{ - typedef typename ViewTraits::memory_space memory_space ; - - KOKKOS_INLINE_FUNCTION static void increment( const void * ptr ) - { memory_space::increment( ptr ); } - - KOKKOS_INLINE_FUNCTION static void decrement( const void * ptr ) - { memory_space::decrement( ptr ); } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class DstMemorySpace , class SrcMemorySpace > -struct DeepCopy ; - -template< class OutputView , unsigned Rank = OutputView::Rank > -struct ViewInit -{ - typedef typename OutputView::device_type device_type ; - typedef typename OutputView::scalar_type scalar_type ; - typedef typename device_type::size_type size_type ; - - const OutputView output ; - - explicit ViewInit( const OutputView & arg_out ) : output( arg_out ) - { parallel_for( output.dimension_0() , *this ); } - - KOKKOS_INLINE_FUNCTION - void operator()( const size_type i0 ) const - { - const scalar_type default_value = scalar_type(); - - for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) { - for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) { - for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) { - for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) { - for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) { - for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) { - for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) { - new (&output.at(i0,i1,i2,i3,i4,i5,i6,i7)) scalar_type(default_value) ; - }}}}}}} - } -}; - -template< class OutputView > -struct ViewInit< OutputView , 1 > -{ - typedef typename OutputView::device_type device_type ; - typedef typename OutputView::value_type value_type ; - typedef typename device_type::size_type size_type ; - - const OutputView output ; - - explicit ViewInit( const OutputView & arg_out ) : output( arg_out ) - { parallel_for( output.dimension_0() , *this ); } - - KOKKOS_INLINE_FUNCTION - void operator()( const size_type i0 ) const - { - value_type default_value = value_type(); - new (&output(i0)) value_type(default_value) ; - } -}; - -template< class OutputView > -struct ViewInit< OutputView , 0 > -{ - typedef typename OutputView::device_type device_type ; - typedef typename OutputView::value_type value_type ; - typedef typename device_type::size_type size_type ; - - const OutputView output ; - - explicit ViewInit( const OutputView & arg_out ) : output( arg_out ) - { parallel_for( 1 , *this ); } - - KOKKOS_INLINE_FUNCTION - void operator()( const size_type /*i0*/ ) const - { - value_type default_value = value_type(); - new (&(*output)) value_type(default_value) ; - } -}; - -template< class Device > -struct ViewInitialize -{ - template< class ViewType > - inline explicit ViewInitialize( const ViewType & view ) - { ViewInit init( view ); } -}; - -template< class OutputView , class InputView , unsigned Rank = OutputView::Rank > -struct ViewRemap -{ - typedef typename OutputView::device_type device_type ; - typedef typename device_type::size_type size_type ; - - const OutputView output ; - const InputView input ; - const size_type n0 ; - const size_type n1 ; - const size_type n2 ; - const size_type n3 ; - const size_type n4 ; - const size_type n5 ; - const size_type n6 ; - const size_type n7 ; - - ViewRemap( const OutputView & arg_out , const InputView & arg_in ) - : output( arg_out ), input( arg_in ) - , n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) ) - , n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) ) - , n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) ) - , n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) ) - , n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) ) - , n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) ) - , n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) ) - , n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) ) - { - parallel_for( n0 , *this ); - } - - KOKKOS_INLINE_FUNCTION - void operator()( const size_type i0 ) const - { - for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) { - for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) { - for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) { - for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) { - for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) { - for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) { - for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) { - output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7); - }}}}}}} - } -}; - -template< class OutputView , class InputView > -struct ViewRemap< OutputView , InputView , 0 > -{ - typedef typename OutputView::value_type value_type ; - typedef typename OutputView::memory_space dst_space ; - typedef typename InputView ::memory_space src_space ; - - ViewRemap( const OutputView & arg_out , const InputView & arg_in ) - { - DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() , - arg_in.ptr_on_device() , - sizeof(value_type) ); - } -}; - -template< class OutputView , unsigned Rank = OutputView::Rank > -struct ViewFill -{ - typedef typename OutputView::device_type device_type ; - typedef typename OutputView::const_value_type const_value_type ; - typedef typename device_type::size_type size_type ; - - const OutputView output ; - const_value_type input ; - - ViewFill( const OutputView & arg_out , const_value_type & arg_in ) - : output( arg_out ), input( arg_in ) - { - parallel_for( output.dimension_0() , *this ); - } - - KOKKOS_INLINE_FUNCTION - void operator()( const size_type i0 ) const - { - for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) { - for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) { - for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) { - for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) { - for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) { - for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) { - for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) { - output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ; - }}}}}}} - } -}; - -template< class OutputView > -struct ViewFill< OutputView , 0 > -{ - typedef typename OutputView::device_type device_type ; - typedef typename OutputView::const_value_type const_value_type ; - typedef typename OutputView::memory_space dst_space ; - - ViewFill( const OutputView & arg_out , const_value_type & arg_in ) - { - DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in , - sizeof(const_value_type) ); - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */ - - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp b/kokkos/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp deleted file mode 100644 index 71bc244..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp +++ /dev/null @@ -1,414 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_VIEWTILELEFT_HPP -#define KOKKOS_VIEWTILELEFT_HPP - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -struct LayoutTileLeftFast ; -struct LayoutTileLeftSlow ; - -template< typename ScalarType , unsigned N0 , unsigned N1 , - class RankDynamic , class MemorySpace , class MemoryTraits > -struct ViewSpecialize< ScalarType , ScalarType , - LayoutTileLeft , unsigned_<2> , RankDynamic , - MemorySpace , MemoryTraits > -{ typedef LayoutTileLeftFast type ; }; - -template< typename ScalarType , unsigned N0 , unsigned N1 , - class RankDynamic , class MemorySpace , class MemoryTraits > -struct ViewSpecialize< ScalarType , ScalarType , - LayoutTileLeft , unsigned_<2> , RankDynamic , - MemorySpace , MemoryTraits > -{ typedef LayoutTileLeftSlow type ; }; - -//---------------------------------------------------------------------------- - -template<> -struct ViewAssignment< LayoutTileLeftFast , void , void > -{ -private: - - template< class DT , class DL , class DD , class DM > - inline - void allocate( View & dst , const std::string label ) - { - typedef View DstViewType ; - typedef typename DstViewType::memory_space memory_space ; - - ViewTracking< DstViewType >::decrement( dst.m_ptr_on_device ); - - dst.m_ptr_on_device = (typename DstViewType::value_type *) - memory_space::allocate( label , - typeid(typename DstViewType::value_type) , - sizeof(typename DstViewType::value_type) , - dst.capacity() ); - - ViewInitialize< typename DstViewType::device_type > init( dst ); - } - -public: - - template< class DT , class DL , class DD , class DM > - inline - ViewAssignment( View & dst , - const typename enable_if< ViewTraits::is_managed , std::string >::type & label , - const size_t n0 , - const size_t n1 , - const size_t = 0 , - const size_t = 0 , - const size_t = 0 , - const size_t = 0 , - const size_t = 0 , - const size_t = 0 ) - { - typedef View DstViewType ; - - dst.m_shape.N0 = n0 ; - dst.m_shape.N1 = n1 ; - dst.m_tile_N0 = ( n0 + DstViewType::MASK_0 ) >> DstViewType::SHIFT_0 ; - - allocate( dst , label ); - } - - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - ViewAssignment( View & dst , - const View & src , - typename enable_if< - is_same< View , - typename View::HostMirror >::value - >::type * = 0 ) - { - dst.m_shape = src.m_shape ; - dst.m_tile_N0 = src.m_tile_N0 ; - allocate( dst , "mirror" ); - } -}; - -//---------------------------------------------------------------------------- - -template<> -struct ViewAssignment< LayoutTileLeftFast , LayoutTileLeftFast, void > -{ - /** \brief Assign compatible views */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const typename enable_if<( - ViewAssignable< ViewTraits , ViewTraits >::value - )>::type * = 0 ) - { - typedef View DstViewType ; - typedef typename DstViewType::shape_type shape_type ; - typedef typename DstViewType::memory_space memory_space ; - typedef typename DstViewType::memory_traits memory_traits ; - - ViewTracking< DstViewType >::decrement( dst.m_ptr_on_device ); - - shape_type::assign( dst.m_shape, src.m_shape.N0 , src.m_shape.N1 ); - - dst.m_tile_N0 = src.m_tile_N0 ; - dst.m_ptr_on_device = src.m_ptr_on_device ; - - ViewTracking< DstViewType >::increment( dst.m_ptr_on_device ); - } - - //------------------------------------ - /** \brief Deep copy data from compatible value type, layout, rank, and specialization. - * Check the dimensions and allocation lengths at runtime. - */ - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - inline static - void deep_copy( const View & dst , - const View & src , - const typename Impl::enable_if<( - Impl::is_same< typename ViewTraits::value_type , - typename ViewTraits::non_const_value_type >::value - && - Impl::is_same< typename ViewTraits::array_layout , - typename ViewTraits::array_layout >::value - && - ( unsigned(ViewTraits::rank) == unsigned(ViewTraits::rank) ) - )>::type * = 0 ) - { - typedef ViewTraits dst_traits ; - typedef ViewTraits src_traits ; - - if ( dst.m_ptr_on_device != src.m_ptr_on_device ) { - - Impl::assert_shapes_are_equal( dst.m_shape , src.m_shape ); - - const size_t n_dst = sizeof(typename dst_traits::scalar_type) * dst.capacity(); - const size_t n_src = sizeof(typename src_traits::scalar_type) * src.capacity(); - - Impl::assert_counts_are_equal( n_dst , n_src ); - - DeepCopy< typename dst_traits::memory_space , - typename src_traits::memory_space >( dst.m_ptr_on_device , src.m_ptr_on_device , n_dst ); - } - } -}; - -//---------------------------------------------------------------------------- - -template<> -struct ViewAssignment< LayoutDefault , LayoutTileLeftFast, void > -{ - /** \brief Extracting a single tile from a tiled view */ - - template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > - KOKKOS_INLINE_FUNCTION - ViewAssignment( View & dst , - const View & src , - const unsigned i0 , - const typename enable_if<( - is_same< View , - typename View::tile_type >::value - ), unsigned >::type i1 ) - { - typedef View DstViewType ; - typedef typename DstViewType::shape_type shape_type ; - typedef typename DstViewType::memory_space memory_space ; - typedef typename DstViewType::memory_traits memory_traits ; - - ViewTracking< DstViewType >::decrement( dst.m_ptr_on_device ); - - enum { N0 = SL::N0 }; - enum { N1 = SL::N1 }; - enum { SHIFT_0 = power_of_two::value }; - enum { MASK_0 = N0 - 1 }; - enum { SHIFT_1 = power_of_two::value }; - - const unsigned NT0 = ( src.dimension_0() + MASK_0 ) >> SHIFT_0 ; - - dst.m_ptr_on_device = src.m_ptr_on_device + (( i0 + i1 * NT0 ) << ( SHIFT_0 + SHIFT_1 )); - - ViewTracking< DstViewType >::increment( dst.m_ptr_on_device ); - } -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template< class DataType , class Arg1Type , class Arg2Type , class Arg3Type > -class View< DataType , Arg1Type , Arg2Type , Arg3Type , Impl::LayoutTileLeftFast > - : public ViewTraits< DataType , Arg1Type , Arg2Type , Arg3Type > -{ -private: - template< class , class , class > friend struct Impl::ViewAssignment ; - - typedef ViewTraits< DataType , Arg1Type , Arg2Type , Arg3Type > traits ; - - typedef Impl::ViewAssignment alloc ; - - typedef Impl::ViewAssignment assign ; - - typename traits::value_type * m_ptr_on_device ; - typename traits::shape_type m_shape ; - unsigned m_tile_N0 ; - - typedef typename traits::array_layout layout ; - - enum { SHIFT_0 = Impl::power_of_two::value }; - enum { SHIFT_1 = Impl::power_of_two::value }; - enum { MASK_0 = layout::N0 - 1 }; - enum { MASK_1 = layout::N1 - 1 }; - -public: - - typedef Impl::LayoutTileLeftFast specialize ; - - typedef View< typename traits::const_data_type , - typename traits::array_layout , - typename traits::device_type , - typename traits::memory_traits > const_type ; - - typedef View< typename traits::non_const_data_type , - typename traits::array_layout , - typename traits::device_type::host_mirror_device_type , - void > HostMirror ; - - enum { Rank = 2 }; - - KOKKOS_INLINE_FUNCTION typename traits::shape_type shape() const { return m_shape ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const { return m_shape.N0 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_shape.N1 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return 1 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return 1 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return 1 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return 1 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return 1 ; } - KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return 1 ; } - - KOKKOS_INLINE_FUNCTION - View() : m_ptr_on_device(0) {} - - KOKKOS_INLINE_FUNCTION - ~View() { Impl::ViewTracking< traits >::decrement( m_ptr_on_device ); } - - KOKKOS_INLINE_FUNCTION - View( const View & rhs ) : m_ptr_on_device(0) { (void)assign( *this , rhs ); } - - KOKKOS_INLINE_FUNCTION - View & operator = ( const View & rhs ) { (void)assign( *this , rhs ); return *this ; } - - //------------------------------------ - // Array allocator and member access operator: - - View( const std::string & label , const size_t n0 , const size_t n1 ) - : m_ptr_on_device(0) { (void)alloc( *this , label , n0 , n1 ); } - - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename traits::value_type & operator()( const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 ); - - // Use care to insert necessary parentheses as the - // shift operators have lower precedence than the arithmatic operators. - - return m_ptr_on_device[ - // ( ( Tile offset ) * ( Tile size ) ) - + ( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << (SHIFT_0 + SHIFT_1) ) - // ( Offset within tile ) - + ( (i0 & MASK_0) + ((i1 & MASK_1)< - KOKKOS_INLINE_FUNCTION - typename traits::value_type & - at( const iType0 & i0 , const iType1 & i1 , const int , const int , - const int , const int , const int , const int ) const - { - KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device ); - KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_shape, i0,i1 ); - - // Use care to insert necessary parentheses as the - // shift operators have lower precedence than the arithmatic operators. - - return m_ptr_on_device[ - // ( ( Tile offset ) * ( Tile size ) ) - + ( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << (SHIFT_0 + SHIFT_1) ) - // ( Offset within tile ) - + ( (i0 & MASK_0) + ((i1 & MASK_1)< - tile_type ; - - KOKKOS_INLINE_FUNCTION - typename traits::value_type * ptr_on_device() const { return m_ptr_on_device ; } - - KOKKOS_INLINE_FUNCTION - size_t tiles_in_dimension_0() const { return m_tile_N0 ; } - - KOKKOS_INLINE_FUNCTION - size_t tiles_in_dimension_1() const { return ( m_shape.N1 + MASK_1 ) >> SHIFT_1 ; } - - - template< typename iType > - KOKKOS_INLINE_FUNCTION - size_t global_to_tile_index_0( const iType & global_i0 ) const - { return global_i0 >> SHIFT_0 ; } - - template< typename iType > - KOKKOS_INLINE_FUNCTION - size_t global_to_tile_index_1( const iType & global_i1 ) const - { return global_i1 >> SHIFT_1 ; } - - - template< typename iType > - KOKKOS_INLINE_FUNCTION - size_t global_to_local_tile_index_0( const iType & global_i0 ) const - { return global_i0 & MASK_0 ; } - - template< typename iType > - KOKKOS_INLINE_FUNCTION - size_t global_to_local_tile_index_1( const iType & global_i1 ) const - { return global_i1 & MASK_1 ; } - - - //------------------------------------ - - KOKKOS_INLINE_FUNCTION - typename traits::size_type capacity() const - { - return ( m_tile_N0 * ( ( m_shape.N1 + MASK_1 ) >> SHIFT_1 ) ) << ( SHIFT_0 + SHIFT_1 ); - } -}; - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_hwloc.cpp b/kokkos/kokkos/core/src/impl/Kokkos_hwloc.cpp deleted file mode 100644 index 0399f43..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_hwloc.cpp +++ /dev/null @@ -1,897 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#define DEBUG_PRINT 0 - -#include -#include - -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { - -int host_thread_binding( const std::pair team_topo , - std::pair thread_coord[] ) -{ - const std::pair current = hwloc::get_this_thread_coordinate(); - const int thread_count = team_topo.first * team_topo.second ; - - int i = 0 ; - - // Match one of the requests: - for ( i = 0 ; i < thread_count && current != thread_coord[i] ; ++i ); - - if ( thread_count == i ) { - // Match the NUMA request: - for ( i = 0 ; i < thread_count && current.first != thread_coord[i].first ; ++i ); - } - - if ( thread_count == i ) { - // Match any unclaimed request: - for ( i = 0 ; i < thread_count && ~0u == thread_coord[i].first ; ++i ); - } - - if ( i < thread_count ) { - if ( ! hwloc::bind_this_thread( thread_coord[i] ) ) i = thread_count ; - } - - if ( i < thread_count ) { - -#if DEBUG_PRINT - if ( current != thread_coord[i] ) { - std::cout << " host_thread_binding(" - << team_topo.first << "x" << team_topo.second - << ") rebinding from (" - << current.first << "," - << current.second - << ") to (" - << thread_coord[i].first << "," - << thread_coord[i].second - << ")" << std::endl ; - } -#endif - - thread_coord[i].first = ~0u ; - thread_coord[i].second = ~0u ; - } - - return i < thread_count ? i : -1 ; -} - - -void host_thread_mapping( const std::pair team_topo , - const std::pair core_use , - const std::pair core_topo , - std::pair thread_coord[] ) -{ - const std::pair base( core_topo.first - core_use.first , - core_topo.second - core_use.second ); - - for ( unsigned thread_rank = 0 , team_rank = 0 ; team_rank < team_topo.first ; ++team_rank ) { - for ( unsigned worker_rank = 0 ; worker_rank < team_topo.second ; ++worker_rank , ++thread_rank ) { - - unsigned team_in_numa_count = 0 ; - unsigned team_in_numa_rank = 0 ; - - { // Distribute teams among NUMA regions: - // team_count = k * bin + ( #NUMA - k ) * ( bin + 1 ) - const unsigned bin = team_topo.first / core_use.first ; - const unsigned bin1 = bin + 1 ; - const unsigned k = core_use.first * bin1 - team_topo.first ; - const unsigned part = k * bin ; - - if ( team_rank < part ) { - thread_coord[ thread_rank ].first = base.first + team_rank / bin ; - team_in_numa_rank = team_rank % bin ; - team_in_numa_count = bin ; - } - else { - thread_coord[ thread_rank ].first = base.first + k + ( team_rank - part ) / bin1 ; - team_in_numa_rank = ( team_rank - part ) % bin1 ; - team_in_numa_count = bin1 ; - } - } - - { // Distribute workers to cores within this NUMA region: - // worker_in_numa_count = k * bin + ( (#CORE/NUMA) - k ) * ( bin + 1 ) - const unsigned worker_in_numa_count = team_in_numa_count * team_topo.second ; - const unsigned worker_in_numa_rank = team_in_numa_rank * team_topo.second + worker_rank ; - - const unsigned bin = worker_in_numa_count / core_use.second ; - const unsigned bin1 = bin + 1 ; - const unsigned k = core_use.second * bin1 - worker_in_numa_count ; - const unsigned part = k * bin ; - - thread_coord[ thread_rank ].second = base.second + - ( ( worker_in_numa_rank < part ) - ? ( worker_in_numa_rank / bin ) - : ( k + ( worker_in_numa_rank - part ) / bin1 ) ); - } - }} - -#if DEBUG_PRINT - - std::cout << "Kokkos::host_thread_mapping (unrotated)" << std::endl ; - - for ( unsigned g = 0 , t = 0 ; g < team_topo.first ; ++g ) { - std::cout << " team[" << g - << "] on numa[" << thread_coord[t].first - << "] cores(" ; - for ( unsigned w = 0 ; w < team_topo.second ; ++w , ++t ) { - std::cout << " " << thread_coord[t].second ; - } - std::cout << " )" << std::endl ; - } - -#endif - -} - -void host_thread_mapping( const std::pair team_topo , - const std::pair core_use , - const std::pair core_topo , - const std::pair master_coord , - std::pair thread_coord[] ) -{ - const unsigned thread_count = team_topo.first * team_topo.second ; - const unsigned core_base = core_topo.second - core_use.second ; - - host_thread_mapping( team_topo , core_use , core_topo , thread_coord ); - - // The master core should be thread #0 so rotate all coordinates accordingly ... - - const std::pair offset - ( ( thread_coord[0].first < master_coord.first ? master_coord.first - thread_coord[0].first : 0 ) , - ( thread_coord[0].second < master_coord.second ? master_coord.second - thread_coord[0].second : 0 ) ); - - for ( unsigned i = 0 ; i < thread_count ; ++i ) { - thread_coord[i].first = ( thread_coord[i].first + offset.first ) % core_use.first ; - thread_coord[i].second = core_base + ( thread_coord[i].second + offset.second - core_base ) % core_use.second ; - } - -#if DEBUG_PRINT - - std::cout << "Kokkos::host_thread_mapping (rotated)" << std::endl ; - - for ( unsigned g = 0 , t = 0 ; g < team_topo.first ; ++g ) { - std::cout << " team[" << g - << "] on numa[" << thread_coord[t].first - << "] cores(" ; - for ( unsigned w = 0 ; w < team_topo.second ; ++w , ++t ) { - std::cout << " " << thread_coord[t].second ; - } - std::cout << " )" << std::endl ; - } - -#endif - -} - -} /* namespace Impl */ -} /* namespace Kokkos */ - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace hwloc { - -std::pair use_core_topology( const unsigned thread_count ) -{ - const unsigned hwloc_numa_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned hwloc_cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned hwloc_threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - const unsigned hwloc_capacity = hwloc_numa_count * hwloc_cores_per_numa * hwloc_threads_per_core ; - - if ( hwloc_capacity < thread_count ) { - std::ostringstream msg ; - - msg << "Kokkos::hwloc::use_core_topology FAILED : Requested more cores or threads than HWLOC reports are available " - << " numa_count(" << hwloc_numa_count << ") , cores_per_numa(" << hwloc_cores_per_numa << ")" - << " capacity(" << hwloc_capacity << ")" ; - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - - const std::pair core_topo( hwloc_numa_count , hwloc_cores_per_numa ); - - // Start by assuming use of all available cores - std::pair use_core_topo = core_topo ; - - if ( thread_count <= ( core_topo.first - 1 ) * core_topo.second ) { - // Can spawn all requested threads on their own core within fewer NUMA regions of cores. - use_core_topo.first = ( thread_count + core_topo.second - 1 ) / core_topo.second ; - } - - if ( thread_count <= core_topo.first * ( core_topo.second - 1 ) ) { - // Can spawn all requested threads on their own core and have excess core. - use_core_topo.second = ( thread_count + core_topo.first - 1 ) / core_topo.first ; - } - - if ( core_topo.first * core_topo.second < thread_count && - thread_count <= core_topo.first * ( core_topo.second - 1 ) * hwloc_threads_per_core ) { - // Will oversubscribe cores and can omit one core - --use_core_topo.second ; - } - - return use_core_topo ; -} - -int thread_binding( const std::pair team_topo , - std::pair thread_coord[] ) -{ - const std::pair current = hwloc::get_this_thread_coordinate(); - const int thread_count = team_topo.first * team_topo.second ; - - int i = 0 ; - - // Match one of the requests: - for ( i = 0 ; i < thread_count && current != thread_coord[i] ; ++i ); - - if ( thread_count == i ) { - // Match the NUMA request: - for ( i = 0 ; i < thread_count && current.first != thread_coord[i].first ; ++i ); - } - - if ( thread_count == i ) { - // Match any unclaimed request: - for ( i = 0 ; i < thread_count && ~0u == thread_coord[i].first ; ++i ); - } - - if ( i < thread_count ) { - if ( ! hwloc::bind_this_thread( thread_coord[i] ) ) i = thread_count ; - } - - if ( i < thread_count ) { - -#if DEBUG_PRINT - if ( current != thread_coord[i] ) { - std::cout << " host_thread_binding(" - << team_topo.first << "x" << team_topo.second - << ") rebinding from (" - << current.first << "," - << current.second - << ") to (" - << thread_coord[i].first << "," - << thread_coord[i].second - << ")" << std::endl ; - } -#endif - - thread_coord[i].first = ~0u ; - thread_coord[i].second = ~0u ; - } - - return i < thread_count ? i : -1 ; -} - - -void thread_mapping( const std::pair team_topo , - const std::pair core_use , - const std::pair core_topo , - std::pair thread_coord[] ) -{ - const std::pair base( core_topo.first - core_use.first , - core_topo.second - core_use.second ); - - for ( unsigned thread_rank = 0 , team_rank = 0 ; team_rank < team_topo.first ; ++team_rank ) { - for ( unsigned worker_rank = 0 ; worker_rank < team_topo.second ; ++worker_rank , ++thread_rank ) { - - unsigned team_in_numa_count = 0 ; - unsigned team_in_numa_rank = 0 ; - - { // Distribute teams among NUMA regions: - // team_count = k * bin + ( #NUMA - k ) * ( bin + 1 ) - const unsigned bin = team_topo.first / core_use.first ; - const unsigned bin1 = bin + 1 ; - const unsigned k = core_use.first * bin1 - team_topo.first ; - const unsigned part = k * bin ; - - if ( team_rank < part ) { - thread_coord[ thread_rank ].first = base.first + team_rank / bin ; - team_in_numa_rank = team_rank % bin ; - team_in_numa_count = bin ; - } - else { - thread_coord[ thread_rank ].first = base.first + k + ( team_rank - part ) / bin1 ; - team_in_numa_rank = ( team_rank - part ) % bin1 ; - team_in_numa_count = bin1 ; - } - } - - { // Distribute workers to cores within this NUMA region: - // worker_in_numa_count = k * bin + ( (#CORE/NUMA) - k ) * ( bin + 1 ) - const unsigned worker_in_numa_count = team_in_numa_count * team_topo.second ; - const unsigned worker_in_numa_rank = team_in_numa_rank * team_topo.second + worker_rank ; - - const unsigned bin = worker_in_numa_count / core_use.second ; - const unsigned bin1 = bin + 1 ; - const unsigned k = core_use.second * bin1 - worker_in_numa_count ; - const unsigned part = k * bin ; - - thread_coord[ thread_rank ].second = base.second + - ( ( worker_in_numa_rank < part ) - ? ( worker_in_numa_rank / bin ) - : ( k + ( worker_in_numa_rank - part ) / bin1 ) ); - } - }} - -#if DEBUG_PRINT - - std::cout << "Kokkos::hwloc::thread_mapping (unrotated)" << std::endl ; - - for ( unsigned g = 0 , t = 0 ; g < team_topo.first ; ++g ) { - std::cout << " team[" << g - << "] on numa[" << thread_coord[t].first - << "] cores(" ; - for ( unsigned w = 0 ; w < team_topo.second ; ++w , ++t ) { - std::cout << " " << thread_coord[t].second ; - } - std::cout << " )" << std::endl ; - } - -#endif - -} - -void thread_mapping( const std::pair team_topo , - const std::pair core_use , - const std::pair core_topo , - const std::pair master_coord , - std::pair thread_coord[] ) -{ - const unsigned thread_count = team_topo.first * team_topo.second ; - const unsigned core_base = core_topo.second - core_use.second ; - - thread_mapping( team_topo , core_use , core_topo , thread_coord ); - - // The master core should be thread #0 so rotate all coordinates accordingly ... - - const std::pair offset - ( ( thread_coord[0].first < master_coord.first ? master_coord.first - thread_coord[0].first : 0 ) , - ( thread_coord[0].second < master_coord.second ? master_coord.second - thread_coord[0].second : 0 ) ); - - for ( unsigned i = 0 ; i < thread_count ; ++i ) { - thread_coord[i].first = ( thread_coord[i].first + offset.first ) % core_use.first ; - thread_coord[i].second = core_base + ( thread_coord[i].second + offset.second - core_base ) % core_use.second ; - } - -#if DEBUG_PRINT - - std::cout << "Kokkos::hwloc::thread_mapping (rotated)" << std::endl ; - - for ( unsigned g = 0 , t = 0 ; g < team_topo.first ; ++g ) { - std::cout << " team[" << g - << "] on numa[" << thread_coord[t].first - << "] cores(" ; - for ( unsigned w = 0 ; w < team_topo.second ; ++w , ++t ) { - std::cout << " " << thread_coord[t].second ; - } - std::cout << " )" << std::endl ; - } - -#endif - -} - -} /* namespace hwloc */ -} /* namespace Kokkos */ - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - -#if defined( KOKKOS_HAVE_HWLOC ) - -#include -#include -#include - -/*--------------------------------------------------------------------------*/ -/* Third Party Libraries */ - -/* Hardware locality library: http://www.open-mpi.org/projects/hwloc/ */ -#include - -#define REQUIRED_HWLOC_API_VERSION 0x000010300 - -#if HWLOC_API_VERSION < REQUIRED_HWLOC_API_VERSION -#error "Requires http://www.open-mpi.org/projects/hwloc/ Version 1.3 or greater" -#endif - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace hwloc { -namespace { - -enum { MAX_CORE = 1024 }; - -std::pair s_core_topology(0,0); -unsigned s_core_capacity(0); -hwloc_topology_t s_hwloc_topology(0); -hwloc_bitmap_t s_hwloc_location(0); -hwloc_bitmap_t s_process_binding(0); -hwloc_bitmap_t s_core[ MAX_CORE ]; - -struct Sentinel { - ~Sentinel(); - Sentinel(); -}; - -void sentinel() -{ static Sentinel self ; } - -Sentinel::~Sentinel() -{ - hwloc_topology_destroy( s_hwloc_topology ); - hwloc_bitmap_free( s_process_binding ); - hwloc_bitmap_free( s_hwloc_location ); -} - -Sentinel::Sentinel() -{ - s_core_topology = std::pair(0,0); - s_core_capacity = 0 ; - s_hwloc_topology = 0 ; - s_hwloc_location = 0 ; - s_process_binding = 0 ; - - for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ; - - hwloc_topology_init( & s_hwloc_topology ); - hwloc_topology_load( s_hwloc_topology ); - - s_hwloc_location = hwloc_bitmap_alloc(); - s_process_binding = hwloc_bitmap_alloc(); - - hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS ); - - // Choose a hwloc object type for the NUMA level, which may not exist. - - hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ; - - { - // Object types to search, in order. - static const hwloc_obj_type_t candidate_root_type[] = - { HWLOC_OBJ_NODE /* NUMA region */ - , HWLOC_OBJ_SOCKET /* hardware socket */ - , HWLOC_OBJ_MACHINE /* local machine */ - }; - - enum { CANDIDATE_ROOT_TYPE_COUNT = - sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) }; - - for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) { - if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) { - root_type = candidate_root_type[k] ; - } - } - } - - // Determine which of these 'root' types are available to this process. - // The process may have been bound (e.g., by MPI) to a subset of these root types. - // Determine current location of the master (calling) process> - - hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc(); - - hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD ); - - const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type ); - - unsigned root_base = max_root ; - unsigned root_count = 0 ; - unsigned core_per_root = 0 ; - unsigned pu_per_core = 0 ; - bool symmetric = true ; - - for ( unsigned i = 0 ; i < max_root ; ++i ) { - - const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i ); - - if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) { - - ++root_count ; - - // Remember which root (NUMA) object the master thread is running on. - // This will be logical NUMA rank #0 for this process. - - if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) { - root_base = i ; - } - - // Count available cores: - - const unsigned max_core = - hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , - root->allowed_cpuset , - HWLOC_OBJ_CORE ); - - unsigned core_count = 0 ; - - for ( unsigned j = 0 ; j < max_core ; ++j ) { - - const hwloc_obj_t core = - hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology , - root->allowed_cpuset , - HWLOC_OBJ_CORE , j ); - - // If process' cpuset intersects core's cpuset then process can access this core. - // Must use intersection instead of inclusion because the Intel-Phi - // MPI may bind the process to only one of the core's hyperthreads. - // - // Assumption: if the process can access any hyperthread of the core - // then it has ownership of the entire core. - // This assumes that it would be performance-detrimental - // to spawn more than one MPI process per core and use nested threading. - - if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) { - - ++core_count ; - - const unsigned pu_count = - hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , - core->allowed_cpuset , - HWLOC_OBJ_PU ); - - if ( pu_per_core == 0 ) pu_per_core = pu_count ; - - // Enforce symmetry by taking the minimum: - - pu_per_core = std::min( pu_per_core , pu_count ); - - if ( pu_count != pu_per_core ) symmetric = false ; - } - } - - if ( 0 == core_per_root ) core_per_root = core_count ; - - // Enforce symmetry by taking the minimum: - - core_per_root = std::min( core_per_root , core_count ); - - if ( core_count != core_per_root ) symmetric = false ; - } - } - - s_core_topology.first = root_count ; - s_core_topology.second = core_per_root ; - s_core_capacity = pu_per_core ; - - // Fill the 's_core' array for fast mapping from a core coordinate to the - // hwloc cpuset object required for thread location querying and binding. - - for ( unsigned i = 0 ; i < max_root ; ++i ) { - - const unsigned root_rank = ( i + root_base ) % max_root ; - - const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank ); - - if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) { - - const unsigned max_core = - hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , - root->allowed_cpuset , - HWLOC_OBJ_CORE ); - - unsigned core_count = 0 ; - - for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) { - - const hwloc_obj_t core = - hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology , - root->allowed_cpuset , - HWLOC_OBJ_CORE , j ); - - if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) { - - s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ; - - ++core_count ; - } - } - } - } - - hwloc_bitmap_free( proc_cpuset_location ); - - if ( ! symmetric ) { - std::cout << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology." - << std::endl ; - } -} - - -inline -void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap ) -{ - s << "{" ; - for ( int i = hwloc_bitmap_first( bitmap ) ; - -1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) { - s << " " << i ; - } - s << " }" ; -} - -} // namespace - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -bool available() -{ return true ; } - -unsigned get_available_numa_count() -{ sentinel(); return s_core_topology.first ; } - -unsigned get_available_cores_per_numa() -{ sentinel(); return s_core_topology.second ; } - -unsigned get_available_threads_per_core() -{ sentinel(); return s_core_capacity ; } - - -std::pair -get_core_topology() -{ sentinel(); return s_core_topology ; } - -unsigned -get_core_capacity() -{ sentinel(); return s_core_capacity ; } - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -unsigned bind_this_thread( - const unsigned coordinate_count , - std::pair coordinate[] ) -{ - unsigned i = 0 ; - - try { - const std::pair current = get_this_thread_coordinate(); - - // Match one of the requests: - for ( i = 0 ; i < coordinate_count && current != coordinate[i] ; ++i ); - - if ( coordinate_count == i ) { - // Match the first request (typically NUMA): - for ( i = 0 ; i < coordinate_count && current.first != coordinate[i].first ; ++i ); - } - - if ( coordinate_count == i ) { - // Match any unclaimed request: - for ( i = 0 ; i < coordinate_count && ~0u == coordinate[i].first ; ++i ); - } - - if ( coordinate_count == i || ! bind_this_thread( coordinate[i] ) ) { - // Failed to bind: - i = ~0u ; - } - - if ( i < coordinate_count ) { - -#if DEBUG_PRINT - if ( current != coordinate[i] ) { - std::cout << " host_thread_binding: rebinding from (" - << current.first << "," - << current.second - << ") to (" - << coordinate[i].first << "," - << coordinate[i].second - << ")" << std::endl ; - } -#endif - - coordinate[i].first = ~0u ; - coordinate[i].second = ~0u ; - } - } - catch( ... ) { - i = ~0u ; - } - - return i ; -} - - -bool bind_this_thread( const std::pair coord ) -{ - sentinel(); - -#if DEBUG_PRINT - - std::cout << "Kokkos::bind_this_thread() at " ; - - hwloc_get_last_cpu_location( s_hwloc_topology , - s_hwloc_location , HWLOC_CPUBIND_THREAD ); - - print_bitmap( std::cout , s_hwloc_location ); - - std::cout << " to " ; - - print_bitmap( std::cout , s_core[ coord.second + coord.first * s_core_topology.second ] ); - - std::cout << std::endl ; - -#endif - - // As safe and fast as possible. - // Fast-lookup by caching the coordinate -> hwloc cpuset mapping in 's_core'. - return coord.first < s_core_topology.first && - coord.second < s_core_topology.second && - 0 == hwloc_set_cpubind( s_hwloc_topology , - s_core[ coord.second + coord.first * s_core_topology.second ] , - HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT ); -} - -bool unbind_this_thread() -{ - sentinel(); - -#define HWLOC_DEBUG_PRINT 0 - -#if HWLOC_DEBUG_PRINT - - std::cout << "Kokkos::unbind_this_thread() from " ; - - hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD ); - - print_bitmap( std::cout , s_hwloc_location ); - -#endif - - const bool result = - s_hwloc_topology && - 0 == hwloc_set_cpubind( s_hwloc_topology , - s_process_binding , - HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT ); - -#if HWLOC_DEBUG_PRINT - - std::cout << " to " ; - - hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD ); - - print_bitmap( std::cout , s_hwloc_location ); - - std::cout << std::endl ; - -#endif - - return result ; - -#undef HWLOC_DEBUG_PRINT - -} - -//---------------------------------------------------------------------------- - -std::pair get_this_thread_coordinate() -{ - sentinel(); - - const unsigned n = s_core_topology.first * s_core_topology.second ; - - std::pair coord(0,0); - - // Using the pre-allocated 's_hwloc_location' to avoid memory - // allocation by this thread. This call is NOT thread-safe. - hwloc_get_last_cpu_location( s_hwloc_topology , - s_hwloc_location , HWLOC_CPUBIND_THREAD ); - - unsigned i = 0 ; - - while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ; - - if ( i < n ) { - coord.first = i / s_core_topology.second ; - coord.second = i % s_core_topology.second ; - } - else { - std::ostringstream msg ; - msg << "Kokkos::get_this_thread_coordinate() FAILED :" ; - - if ( 0 != s_process_binding && 0 != s_hwloc_location ) { - msg << " cpu_location" ; - print_bitmap( msg , s_hwloc_location ); - msg << " is not a member of the process_cpu_set" ; - print_bitmap( msg , s_process_binding ); - } - else { - msg << " not initialized" ; - } - throw std::runtime_error( msg.str() ); - } - return coord ; -} - -//---------------------------------------------------------------------------- - -} /* namespace hwloc */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#else /* ! defined( KOKKOS_HAVE_HWLOC ) */ - -namespace Kokkos { -namespace hwloc { - -bool available() { return false ; } - -unsigned get_available_numa_count() { return 1 ; } -unsigned get_available_cores_per_numa() { return 1 ; } -unsigned get_available_threads_per_core() { return 1 ; } - -unsigned bind_this_thread( const unsigned , std::pair[] ) -{ return ~0 ; } - -bool bind_this_thread( const std::pair ) -{ return false ; } - -bool unbind_this_thread() -{ return true ; } - -std::pair get_this_thread_coordinate() -{ return std::pair(0,0); } - -std::pair get_core_topology() -{ return std::pair(1,1); } - -unsigned get_core_capacity() -{ return 1 ; } - -} // namespace hwloc -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif - - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_spinwait.cpp b/kokkos/kokkos/core/src/impl/Kokkos_spinwait.cpp deleted file mode 100644 index d9377cc..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_spinwait.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include - -/*--------------------------------------------------------------------------*/ - -#if ! defined( KOKKOS_DISABLE_ASM ) && \ - ( defined( __GNUC__ ) || \ - defined( __GNUG__ ) || \ - defined( __INTEL_COMPILER__ ) ) - -#ifndef __arm__ -/* Pause instruction to prevent excess processor bus usage */ -#define YIELD asm volatile("pause\n":::"memory") -#else -/* No-operation instruction to idle the thread. */ -#define YIELD asm volatile("nop") -#endif - -#elif ! defined( KOKKOS_HAVE_WINTHREAD ) - -#include - -#define YIELD sched_yield() - -#else - -#include - -#define YIELD Sleep(0) - -#endif - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { - -void spinwait( volatile int & flag , const int value ) -{ - while ( value == flag ) { - YIELD ; - } -} - -} /* namespace Impl */ -} /* namespace Kokkos */ - diff --git a/kokkos/kokkos/core/src/impl/Kokkos_spinwait.hpp b/kokkos/kokkos/core/src/impl/Kokkos_spinwait.hpp deleted file mode 100644 index f2b42e9..0000000 --- a/kokkos/kokkos/core/src/impl/Kokkos_spinwait.hpp +++ /dev/null @@ -1,59 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos: Manycore Performance-Portable Multidimensional Arrays -// Copyright (2012) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - - -#ifndef KOKKOS_SPINWAIT_HPP -#define KOKKOS_SPINWAIT_HPP - -namespace Kokkos { -namespace Impl { - -void spinwait( volatile int & flag , const int value ); - -} /* namespace Impl */ -} /* namespace Kokkos */ - -#undef KOKKOS_YIELD - -#endif /* #ifndef KOKKOS_SPINWAIT_HPP */ - diff --git a/kokkos/kokkos/linalg/src/Kokkos_CrsMatrix.hpp b/kokkos/kokkos/linalg/src/Kokkos_CrsMatrix.hpp index 9a55fb4..ecf0187 100644 --- a/kokkos/kokkos/linalg/src/Kokkos_CrsMatrix.hpp +++ b/kokkos/kokkos/linalg/src/Kokkos_CrsMatrix.hpp @@ -53,18 +53,9 @@ #include #include -#include -#include -#ifdef KOKKOS_HAVE_CUDA -# include -#endif -#include +#include #include -#include - -#ifndef _OPENMP -#include -#endif // ! _OPENMP +#include #ifdef KOKKOS_USE_CUSPARSE # include @@ -95,19 +86,19 @@ namespace Kokkos { /// /// Here is an example loop over the entries in the row: /// \code -/// typedef typename SparseRowView::scalar_type scalar_type; +/// typedef typename SparseRowView::value_type value_type; /// typedef typename SparseRowView::ordinal_type ordinal_type; /// /// SparseRowView A_i = ...; /// const int numEntries = A_i.length; /// for (int k = 0; k < numEntries; ++k) { -/// scalar_type A_ij = A_i.value (k); +/// value_type A_ij = A_i.value (k); /// ordinal_type j = A_i.colidx (k); /// // ... do something with A_ij and j ... /// } /// \endcode /// -/// MatrixType must provide the \c scalar_type and \c ordinal_type +/// MatrixType must provide the \c value_type and \c ordinal_type /// typedefs. In addition, it must make sense to use SparseRowView to /// view a row of MatrixType. In particular, the values and column /// indices of a row must be accessible using the values @@ -119,13 +110,13 @@ namespace Kokkos { template struct SparseRowView { //! The type of the values in the row. - typedef typename MatrixType::scalar_type scalar_type; + typedef typename MatrixType::value_type value_type; //! The type of the column indices in the row. typedef typename MatrixType::ordinal_type ordinal_type; private: //! Array of values in the row. - scalar_type* values_; + value_type* values_; //! Array of (local) column indices in the row. ordinal_type* colidx_; //! Stride between successive entries in the row. @@ -140,7 +131,7 @@ struct SparseRowView { /// each of the above arrays. /// \param count [in] Number of entries in the row. KOKKOS_INLINE_FUNCTION - SparseRowView (scalar_type* const values, + SparseRowView (value_type* const values, ordinal_type* const colidx, const int stride, const int count) : @@ -159,7 +150,7 @@ struct SparseRowView { /// "Entry i" is not necessarily the entry with column index i, nor /// does i necessarily correspond to the (local) row index. KOKKOS_INLINE_FUNCTION - scalar_type& value (const int& i) const { + value_type& value (const int& i) const { return values_[i*stride_]; } @@ -184,13 +175,13 @@ struct SparseRowView { template struct SparseRowViewConst { //! The type of the values in the row. - typedef const typename MatrixType::nonconst_scalar_type scalar_type; + typedef const typename MatrixType::nonconst_value_type value_type; //! The type of the column indices in the row. typedef const typename MatrixType::nonconst_ordinal_type ordinal_type; private: //! Array of values in the row. - scalar_type* values_; + value_type* values_; //! Array of (local) column indices in the row. ordinal_type* colidx_; //! Stride between successive entries in the row. @@ -205,7 +196,7 @@ struct SparseRowViewConst { /// each of the above arrays. /// \param count [in] Number of entries in the row. KOKKOS_INLINE_FUNCTION - SparseRowViewConst (scalar_type* const values, + SparseRowViewConst (value_type* const values, ordinal_type* const colidx, const int stride, const int count) : @@ -224,7 +215,7 @@ struct SparseRowViewConst { /// "Entry i" is not necessarily the entry with column index i, nor /// does i necessarily correspond to the (local) row index. KOKKOS_INLINE_FUNCTION - scalar_type& value (const int& i) const { + value_type& value (const int& i) const { return values_[i*stride_]; } @@ -260,24 +251,12 @@ template HostMirror; @@ -323,11 +302,11 @@ class CrsMatrix { //! Type of the "row map" (which contains the offset for each row's data). typedef typename StaticCrsGraphType::row_map_type row_map_type; //! Kokkos Array type of the entries (values) in the sparse matrix. - typedef Kokkos::View values_type; + typedef Kokkos::View values_type; //! Const version of the type of the entries in the sparse matrix. - typedef typename values_type::const_scalar_type const_scalar_type; + typedef typename values_type::const_value_type const_value_type; //! Nonconst version of the type of the entries in the sparse matrix. - typedef typename values_type::non_const_scalar_type non_const_scalar_type; + typedef typename values_type::non_const_value_type non_const_value_type; #ifdef KOKKOS_USE_CUSPARSE cusparseHandle_t cusparse_handle; @@ -845,87 +824,6 @@ generateHostGraph ( OrdinalType nrows, } -// FIXME (mfh 09 Aug 2013) These "shuffle" operations need to move -// into kokkos/core, because they are fundamental to Kokkos and not -// specific to sparse matrices. -// -// Shuffle only makes sense on >= Kepler GPUs; it doesn't work on CPUs -// or other GPUs. We provide a generic definition (which is trivial -// and doesn't do what it claims to do) because we don't actually use -// this function unless we are on a suitable GPU, with a suitable -// Scalar type. (For example, in the mat-vec, the "ThreadsPerRow" -// internal parameter depends both on the Device and the Scalar type, -// and it controls whether shfl_down() gets called.) -template -KOKKOS_INLINE_FUNCTION -Scalar shfl_down(const Scalar &val, const int& delta, const int& width){ - return val; -} - -template<> -KOKKOS_INLINE_FUNCTION -unsigned int shfl_down(const unsigned int &val, const int& delta, const int& width){ -#ifdef __CUDA_ARCH__ - #if (__CUDA_ARCH__ >= 300) - unsigned int tmp1 = val; - int tmp = *reinterpret_cast(&tmp1); - tmp = __shfl_down(tmp,delta,width); - return *reinterpret_cast(&tmp); - #else - return val; - #endif -#else - return val; -#endif -} - -template<> -KOKKOS_INLINE_FUNCTION -int shfl_down(const int &val, const int& delta, const int& width){ -#ifdef __CUDA_ARCH__ - #if (__CUDA_ARCH__ >= 300) - return __shfl_down(val,delta,width); - #else - return val; - #endif -#else - return val; -#endif -} - -template<> -KOKKOS_INLINE_FUNCTION -float shfl_down(const float &val, const int& delta, const int& width){ -#ifdef __CUDA_ARCH__ - #if (__CUDA_ARCH__ >= 300) - return __shfl_down(val,delta,width); - #else - return val; - #endif -#else - return val; -#endif -} - -template<> -KOKKOS_INLINE_FUNCTION -double shfl_down(const double &val, const int& delta, const int& width){ -#ifdef __CUDA_ARCH__ - #if (__CUDA_ARCH__ >= 300) - int lo = __double2loint(val); - int hi = __double2hiint(val); - lo = __shfl_down(lo,delta,width); - hi = __shfl_down(hi,delta,width); - return __hiloint2double(hi,lo); - #else - return val; - #endif -#else - return val; -#endif -} - - template range_values; + typedef typename CrsMatrix::non_const_value_type value_type ; + typedef typename Kokkos::View range_values; CoeffVector1 beta; CoeffVector2 alpha; @@ -954,7 +852,7 @@ struct MV_MultiplyFunctor { void strip_mine (const size_type i, const size_type kk) const { const size_type iRow = i / ThreadsPerRow; const int lane = i % ThreadsPerRow; - scalar_type sum[UNROLL]; + value_type sum[UNROLL]; // FIXME (mfh 29 Sep 2013) These pragmas ("ivdep", "unroll", and // "loop count") should be protected by macros that identify the // compilers which support them. @@ -965,7 +863,7 @@ struct MV_MultiplyFunctor { #pragma unroll for (size_type k = 0 ; k < UNROLL ; ++k) { // NOTE (mfh 09 Aug 2013) This requires that assignment from int - // (in this case, 0) to scalar_type be defined. It's not for + // (in this case, 0) to value_type be defined. It's not for // types like arprec and dd_real. // // mfh 29 Sep 2013: On the other hand, arprec and dd_real won't @@ -984,7 +882,7 @@ struct MV_MultiplyFunctor { #pragma loop count (15) #pragma unroll for (size_type iEntry = lane; iEntry < row.length; iEntry += ThreadsPerRow) { - const scalar_type val = row.value(iEntry); + const value_type val = row.value(iEntry); const size_type ind = row.colidx(iEntry); #pragma unroll @@ -999,7 +897,7 @@ struct MV_MultiplyFunctor { #pragma loop count (15) #pragma unroll for(size_type iEntry = lane ; iEntry < row.length ; iEntry+=ThreadsPerRow) { - const scalar_type val = row.value(iEntry); + const value_type val = row.value(iEntry); const size_type ind = row.colidx(iEntry); #pragma unroll @@ -1061,7 +959,7 @@ struct MV_MultiplyFunctor { void strip_mine_1 (const size_type i) const { const size_type iRow = i/ThreadsPerRow; const int lane = i%ThreadsPerRow; - scalar_type sum = 0; + value_type sum = 0; if(doalpha != -1) { const SparseRowView row = m_A.row(iRow); @@ -1216,8 +1114,8 @@ struct MV_MultiplyFunctor { struct MV_MultiplySingleFunctor { typedef typename CrsMatrix::device_type device_type ; typedef typename CrsMatrix::ordinal_type size_type ; - typedef typename CrsMatrix::non_const_scalar_type scalar_type ; - typedef typename Kokkos::View range_values; + typedef typename CrsMatrix::non_const_value_type value_type ; + typedef typename Kokkos::View range_values; CoeffVector1 beta; CoeffVector2 alpha; @@ -1230,7 +1128,7 @@ struct MV_MultiplyFunctor { void operator()(const size_type i) const { const size_type iRow = i/ThreadsPerRow; const int lane = i%ThreadsPerRow; - scalar_type sum = 0; + value_type sum = 0; if (doalpha != -1) { const SparseRowView row = m_A.row(iRow); @@ -1301,33 +1199,33 @@ struct MV_MultiplyFunctor { if (y.dimension_1() != numVecs) { std::ostringstream msg; msg << "Error in CRSMatrix - Vector Multiply (y = by + aAx): 2nd dimensions of y and x do not match\n"; - msg << "\t Labels are: y(" << RangeVector::memory_space::query_label(y.ptr_on_device()) << ") b(" - << CoeffVector1::memory_space::query_label(betav.ptr_on_device()) << ") a(" - << CoeffVector2::memory_space::query_label(alphav.ptr_on_device()) << ") x(" - << CrsMatrix::values_type::memory_space::query_label(A.values.ptr_on_device()) << ") x(" - << DomainVector::memory_space::query_label(x.ptr_on_device()) << ")\n"; + msg << "\t Labels are: y(" << y.label() << ") b(" + << betav.label() << ") a(" + << alphav.label() << ") x(" + << A.values.label() << ") x(" + << x.label() << ")\n"; msg << "\t Dimensions are: y(" << y.dimension_0() << "," << y.dimension_1() << ") x(" << x.dimension_0() << "," << x.dimension_1() << ")\n"; Impl::throw_runtime_exception( msg.str() ); } if (numRows > y.dimension_0()) { std::ostringstream msg; msg << "Error in CRSMatrix - Vector Multiply (y = by + aAx): dimensions of y and A do not match\n"; - msg << "\t Labels are: y(" << RangeVector::memory_space::query_label(y.ptr_on_device()) << ") b(" - << CoeffVector1::memory_space::query_label(betav.ptr_on_device()) << ") a(" - << CoeffVector2::memory_space::query_label(alphav.ptr_on_device()) << ") x(" - << CrsMatrix::values_type::memory_space::query_label(A.values.ptr_on_device()) << ") x(" - << DomainVector::memory_space::query_label(x.ptr_on_device()) << ")\n"; + msg << "\t Labels are: y(" << y.label() << ") b(" + << betav.label() << ") a(" + << alphav.label() << ") x(" + << A.values.label() << ") x(" + << x.label() << ")\n"; msg << "\t Dimensions are: y(" << y.dimension_0() << "," << y.dimension_1() << ") A(" << A.numCols() << "," << A.numRows() << ")\n"; Impl::throw_runtime_exception( msg.str() ); } if (numCols > x.dimension_0()) { std::ostringstream msg; msg << "Error in CRSMatrix - Vector Multiply (y = by + aAx): dimensions of x and A do not match\n"; - msg << "\t Labels are: y(" << RangeVector::memory_space::query_label(y.ptr_on_device()) << ") b(" - << CoeffVector1::memory_space::query_label(betav.ptr_on_device()) << ") a(" - << CoeffVector2::memory_space::query_label(alphav.ptr_on_device()) << ") x(" - << CrsMatrix::values_type::memory_space::query_label(A.values.ptr_on_device()) << ") x(" - << DomainVector::memory_space::query_label(x.ptr_on_device()) << ")\n"; + msg << "\t Labels are: y(" << y.label() << ") b(" + << betav.label() << ") a(" + << alphav.label() << ") x(" + << A.values.label() << ") x(" + << x.label() << ")\n"; msg << "\t Dimensions are: x(" << x.dimension_0() << "," << x.dimension_1() << ") A(" << A.numCols() << "," << A.numRows() << ")\n"; Impl::throw_runtime_exception( msg.str() ); } @@ -1335,11 +1233,11 @@ struct MV_MultiplyFunctor { if (betav.dimension_0()!=numVecs) { std::ostringstream msg; msg << "Error in CRSMatrix - Vector Multiply (y = by + aAx): 2nd dimensions of y and b do not match\n"; - msg << "\t Labels are: y(" << RangeVector::memory_space::query_label(y.ptr_on_device()) << ") b(" - << CoeffVector1::memory_space::query_label(betav.ptr_on_device()) << ") a(" - << CoeffVector2::memory_space::query_label(alphav.ptr_on_device()) << ") x(" - << CrsMatrix::values_type::memory_space::query_label(A.values.ptr_on_device()) << ") x(" - << DomainVector::memory_space::query_label(x.ptr_on_device()) << ")\n"; + msg << "\t Labels are: y(" << y.label() << ") b(" + << betav.label() << ") a(" + << alphav.label() << ") x(" + << A.values.label() << ") x(" + << x.label() << ")\n"; msg << "\t Dimensions are: y(" << y.dimension_0() << "," << y.dimension_1() << ") b(" << betav.dimension_0() << ")\n"; Impl::throw_runtime_exception( msg.str() ); } @@ -1348,11 +1246,11 @@ struct MV_MultiplyFunctor { if(alphav.dimension_0()!=numVecs) { std::ostringstream msg; msg << "Error in CRSMatrix - Vector Multiply (y = by + aAx): 2nd dimensions of x and b do not match\n"; - msg << "\t Labels are: y(" << RangeVector::memory_space::query_label(y.ptr_on_device()) << ") b(" - << CoeffVector1::memory_space::query_label(betav.ptr_on_device()) << ") a(" - << CoeffVector2::memory_space::query_label(alphav.ptr_on_device()) << ") x(" - << CrsMatrix::values_type::memory_space::query_label(A.values.ptr_on_device()) << ") x(" - << DomainVector::memory_space::query_label(x.ptr_on_device()) << ")\n"; + msg << "\t Labels are: y(" << y.label() << ") b(" + << betav.label() << ") a(" + << alphav.label() << ") x(" + << A.values.label() << ") x(" + << x.label() << ")\n"; msg << "\t Dimensions are: x(" << x.dimension_0() << "," << x.dimension_1() << ") b(" << betav.dimension_0() << ")\n"; Impl::throw_runtime_exception( msg.str() ); } @@ -1420,22 +1318,22 @@ struct MV_MultiplyFunctor { typedef View< typename DomainVector::const_data_type , typename DomainVector::array_layout , typename DomainVector::device_type , - Kokkos::MemoryRandomRead > + Kokkos::MemoryRandomAccess > DomainVectorType; typedef View< typename CoeffVector1::const_data_type , typename CoeffVector1::array_layout , typename CoeffVector1::device_type , - Kokkos::MemoryRandomRead > + Kokkos::MemoryRandomAccess > CoeffVector1Type; typedef View< typename CoeffVector2::const_data_type , typename CoeffVector2::array_layout , typename CoeffVector2::device_type , - Kokkos::MemoryRandomRead > + Kokkos::MemoryRandomAccess > CoeffVector2Type; - typedef CrsMatrix::value> op ; + ThreadsPerRow::value> op ; const typename CrsMatrixType::ordinal_type nrow = A.numRows(); op.m_A = A ; op.m_x = x ; @@ -1454,12 +1352,12 @@ struct MV_MultiplyFunctor { op.beta = betav; op.alpha = alphav; op.n = x.dimension(1); - Kokkos::parallel_for(nrow*ThreadsPerRow::value , op); + Kokkos::parallel_for("SPMV n-rhs",nrow*ThreadsPerRow::value , op); #else // NOT KOKKOS_FAST_COMPILE MV_MultiplyFunctor::value> op ; + ThreadsPerRow::value> op ; int numVecs = x.dimension_1(); CoeffVector1 beta = betav; @@ -1468,7 +1366,7 @@ struct MV_MultiplyFunctor { if (doalpha != 2) { alpha = CoeffVector2("CrsMatrix::auto_a", numVecs); typename CoeffVector2::HostMirror h_a = Kokkos::create_mirror_view(alpha); - typename CoeffVector2::scalar_type s_a = (typename CoeffVector2::scalar_type) doalpha; + typename CoeffVector2::value_type s_a = (typename CoeffVector2::value_type) doalpha; for (int i = 0; i < numVecs; ++i) h_a(i) = s_a; @@ -1479,7 +1377,7 @@ struct MV_MultiplyFunctor { if (dobeta != 2) { beta = CoeffVector1("CrsMatrix::auto_b", numVecs); typename CoeffVector1::HostMirror h_b = Kokkos::create_mirror_view(beta); - typename CoeffVector1::scalar_type s_b = (typename CoeffVector1::scalar_type) dobeta; + typename CoeffVector1::value_type s_b = (typename CoeffVector1::value_type) dobeta; for(int i = 0; i < numVecs; i++) h_b(i) = s_b; @@ -1494,8 +1392,8 @@ struct MV_MultiplyFunctor { op.beta = beta; op.alpha = alpha; op.n = x.dimension_1(); - Kokkos::parallel_for (nrow * ThreadsPerRow::value, op); + Kokkos::parallel_for ("SPMV n-rhs",nrow * ThreadsPerRow::value, op); #endif // KOKKOS_FAST_COMPILE } } @@ -1532,22 +1430,20 @@ struct MV_MultiplyFunctor { typedef View< typename DomainVector::const_data_type , typename DomainVector::array_layout , typename DomainVector::device_type , - Kokkos::MemoryRandomRead > + Kokkos::MemoryRandomAccess > DomainVectorType; typedef View< typename CoeffVector1::const_data_type , typename CoeffVector1::array_layout , - typename CoeffVector1::device_type , - Kokkos::MemoryRandomRead > + typename CoeffVector1::device_type> CoeffVector1Type; typedef View< typename CoeffVector2::const_data_type , typename CoeffVector2::array_layout , - typename CoeffVector2::device_type , - Kokkos::MemoryRandomRead > + typename CoeffVector2::device_type> CoeffVector2Type; - typedef CrsMatrix::value> op ; + ,ThreadsPerRow::value> op ; const typename CrsMatrixType::ordinal_type nrow = A.numRows(); op.m_A = A ; op.m_x = x ; @@ -1567,14 +1463,14 @@ struct MV_MultiplyFunctor { op.beta = betav; op.alpha = alphav; op.n = x.dimension(1); - Kokkos::parallel_for (nrow * ThreadsPerRow::value, op); + Kokkos::parallel_for ("SPMV",nrow * ThreadsPerRow::value, op); #else // NOT KOKKOS_FAST_COMPILE MV_MultiplySingleFunctor::value> op; + ThreadsPerRow::value> op; int numVecs = x.dimension_1(); CoeffVector1 beta = betav; @@ -1583,7 +1479,7 @@ struct MV_MultiplyFunctor { if(doalpha!=2) { alpha = CoeffVector2("CrsMatrix::auto_a", numVecs); typename CoeffVector2::HostMirror h_a = Kokkos::create_mirror_view(alpha); - typename CoeffVector2::scalar_type s_a = (typename CoeffVector2::scalar_type) doalpha; + typename CoeffVector2::value_type s_a = (typename CoeffVector2::value_type) doalpha; for(int i = 0; i < numVecs; i++) h_a(i) = s_a; @@ -1593,7 +1489,7 @@ struct MV_MultiplyFunctor { if(dobeta!=2) { beta = CoeffVector1("CrsMatrix::auto_b", numVecs); typename CoeffVector1::HostMirror h_b = Kokkos::create_mirror_view(beta); - typename CoeffVector1::scalar_type s_b = (typename CoeffVector1::scalar_type) dobeta; + typename CoeffVector1::value_type s_b = (typename CoeffVector1::value_type) dobeta; for(int i = 0; i < numVecs; i++) h_b(i) = s_b; @@ -1607,8 +1503,8 @@ struct MV_MultiplyFunctor { op.beta = beta; op.alpha = alpha; op.n = x.dimension_1(); - Kokkos::parallel_for (nrow * ThreadsPerRow::value, op); + Kokkos::parallel_for ("SPMV",nrow * ThreadsPerRow::value, op); #endif // KOKKOS_FAST_COMPILE } } @@ -1711,7 +1607,7 @@ struct MV_MultiplyFunctor { return; } #endif // KOKKOS_USE_MKL - typedef Kokkos::View aVector; + typedef Kokkos::View aVector; aVector a; return MV_Multiply (a, y, a, A, x, 0, 1); @@ -1720,7 +1616,7 @@ struct MV_MultiplyFunctor { template void MV_Multiply (const RangeVector& y, - typename DomainVector::const_scalar_type s_a, + typename DomainVector::const_value_type s_a, const CrsMatrix& A, const DomainVector& x) { @@ -1734,7 +1630,7 @@ struct MV_MultiplyFunctor { return; } #endif // KOKKOS_USE_MKL - typedef Kokkos::View aVector; + typedef Kokkos::View aVector; aVector a; const int numVecs = x.dimension_1(); @@ -1746,20 +1642,16 @@ struct MV_MultiplyFunctor { if (s_a != 0) { a = aVector("a", numVecs); - typename aVector::HostMirror h_a = Kokkos::create_mirror_view (a); - for (int i = 0; i < numVecs; ++i) { - h_a(i) = s_a; - } - Kokkos::deep_copy(a, h_a); + Kokkos::deep_copy(a, s_a); return MV_Multiply (a, y, a, A, x, 0, 2); } } template void - MV_Multiply (typename RangeVector::const_scalar_type s_b, + MV_Multiply (typename RangeVector::const_value_type s_b, const RangeVector& y, - typename DomainVector::const_scalar_type s_a, + typename DomainVector::const_value_type s_a, const CrsMatrix& A, const DomainVector& x) { @@ -1773,7 +1665,7 @@ struct MV_MultiplyFunctor { return; } #endif // KOKKOS_USE_MKL - typedef Kokkos::View aVector; + typedef Kokkos::View aVector; aVector a; aVector b; int numVecs = x.dimension_1(); diff --git a/kokkos/kokkos/linalg/src/Kokkos_MV.hpp b/kokkos/kokkos/linalg/src/Kokkos_MV.hpp index 7084dbd..9138a20 100644 --- a/kokkos/kokkos/linalg/src/Kokkos_MV.hpp +++ b/kokkos/kokkos/linalg/src/Kokkos_MV.hpp @@ -1,20 +1,8 @@ #ifndef KOKKOS_MULTIVECTOR_H_ #define KOKKOS_MULTIVECTOR_H_ -#include - -#include -#include - -#ifdef KOKKOS_HAVE_OPENMP -#include -#endif -#ifdef KOKKOS_HAVE_CUDA -#include -#endif -#include -#include #include +#include namespace Kokkos { @@ -32,7 +20,7 @@ struct MultiVectorDynamic{ #endif typedef typename Kokkos::View type ; typedef typename Kokkos::View const_type ; - typedef typename Kokkos::View random_read_type ; + typedef typename Kokkos::View random_read_type ; MultiVectorDynamic() {} ~MultiVectorDynamic() {} }; @@ -43,7 +31,7 @@ struct MultiVectorStatic{ typedef typename device::array_layout layout; typedef typename Kokkos::View type ; typedef typename Kokkos::View const_type ; - typedef typename Kokkos::View random_read_type ; + typedef typename Kokkos::View random_read_type ; MultiVectorStatic() {} ~MultiVectorStatic() {} }; @@ -56,7 +44,6 @@ struct MultiVectorStatic{ template struct MV_MulScalarFunctor { - typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; RVector m_r; @@ -78,7 +65,6 @@ struct MV_MulScalarFunctor template struct MV_MulScalarFunctorSelf { - typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; XVector m_x; @@ -95,16 +81,16 @@ struct MV_MulScalarFunctorSelf } }; -template -RVector MV_MulScalar( const RVector & r, const typename Kokkos::View & a, const XVector & x) +template +RVector MV_MulScalar( const RVector & r, const typename Kokkos::View & a, const XVector & x) { - typedef typename Kokkos::View aVector; + typedef typename Kokkos::View aVector; if(r==x) { MV_MulScalarFunctorSelf op ; op.m_x = x ; op.m_a = a ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_MulScalar",x.dimension(0) , op ); return r; } @@ -113,19 +99,19 @@ RVector MV_MulScalar( const RVector & r, const typename Kokkos::View -struct MV_MulScalarFunctor +struct MV_MulScalarFunctor { typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; RVector m_r; typename XVector::const_type m_x ; - typename XVector::scalar_type m_a ; + typename XVector::value_type m_a ; size_type n; MV_MulScalarFunctor() {n=1;} //-------------------------------------------------------------------------- @@ -140,13 +126,13 @@ struct MV_MulScalarFunctor }; template -struct MV_MulScalarFunctorSelf +struct MV_MulScalarFunctorSelf { typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; XVector m_x; - typename XVector::scalar_type m_a ; + typename XVector::value_type m_a ; size_type n; //-------------------------------------------------------------------------- @@ -160,23 +146,23 @@ struct MV_MulScalarFunctorSelf }; template -RVector MV_MulScalar( const RVector & r, const typename XVector::scalar_type &a, const XVector & x) +RVector MV_MulScalar( const RVector & r, const typename XVector::value_type &a, const XVector & x) { if(r==x) { - MV_MulScalarFunctorSelf op ; + MV_MulScalarFunctorSelf op ; op.m_x = x ; op.m_a = a ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_MulScalar",x.dimension(0) , op ); return r; } - MV_MulScalarFunctor op ; + MV_MulScalarFunctor op ; op.m_r = r ; op.m_x = x ; op.m_a = a ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_MulScalar",x.dimension(0) , op ); return r; } /*------------------------------------------------------------------------------------------ @@ -189,7 +175,6 @@ RVector MV_MulScalar( const RVector & r, const typename XVector::scalar_type &a, template struct MV_AddUnrollFunctor { - typedef typename RVector::device_type device_type; typedef typename RVector::size_type size_type; RVector m_r ; @@ -257,7 +242,6 @@ for(size_type k=0;k struct MV_AddVectorFunctor { - typedef typename RVector::device_type device_type; typedef typename RVector::size_type size_type; RVector m_r ; @@ -325,7 +309,7 @@ struct MV_AddVectorFunctor /* Variants of Functors with a and b being scalars. */ template -struct MV_AddUnrollFunctor +struct MV_AddUnrollFunctor { typedef typename RVector::device_type device_type; typedef typename RVector::size_type size_type; @@ -333,8 +317,8 @@ struct MV_AddUnrollFunctor -struct MV_AddVectorFunctor +struct MV_AddVectorFunctor { typedef typename RVector::device_type device_type; typedef typename RVector::size_type size_type; @@ -401,8 +385,8 @@ struct MV_AddVectorFunctor", x.dimension(0) , op ); return r; } if(a==1&&b==-1) { @@ -484,7 +468,7 @@ RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddUnroll<1,-1>", x.dimension(0) , op ); return r; } if(a==-1&&b==1) { @@ -495,7 +479,7 @@ RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddUnroll<-1,1>", x.dimension(0) , op ); return r; } if(a==-1&&b==-1) { @@ -506,7 +490,7 @@ RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddUnroll<-1,-1>", x.dimension(0) , op ); return r; } if(a*a!=1&&b==1) { @@ -517,7 +501,7 @@ RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddUnroll<2,1>", x.dimension(0) , op ); return r; } if(a*a!=1&&b==-1) { @@ -528,7 +512,7 @@ RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddUnroll<2,-1>", x.dimension(0) , op ); return r; } if(a==1&&b*b!=1) { @@ -539,7 +523,7 @@ RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddUnroll<1,2>", x.dimension(0) , op ); return r; } if(a==-1&&b*b!=1) { @@ -550,7 +534,7 @@ RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddUnroll<-1,2>", x.dimension(0) , op ); return r; } MV_AddUnrollFunctor op ; @@ -560,7 +544,7 @@ RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddUnroll<2,2>", x.dimension(0) , op ); return r; } @@ -621,7 +605,7 @@ RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddVector<1,1>", x.dimension(0) , op ); return r; } if(a==1&&b==-1) { @@ -632,7 +616,7 @@ RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddVector<1,-1>", x.dimension(0) , op ); return r; } if(a==-1&&b==1) { @@ -643,7 +627,7 @@ RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddVector<-1,1>", x.dimension(0) , op ); return r; } if(a==-1&&b==-1) { @@ -654,7 +638,7 @@ RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddVector<-1,-1>", x.dimension(0) , op ); return r; } if(a*a!=1&&b==1) { @@ -665,7 +649,7 @@ RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddVector<2,1>", x.dimension(0) , op ); return r; } if(a*a!=1&&b==-1) { @@ -676,7 +660,7 @@ RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddVector<2,-1>", x.dimension(0) , op ); return r; } if(a==1&&b*b!=1) { @@ -687,7 +671,7 @@ RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddVector<1,2>", x.dimension(0) , op ); return r; } if(a==-1&&b*b!=1) { @@ -698,7 +682,7 @@ RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddVector<-1,2>", x.dimension(0) , op ); return r; } MV_AddVectorFunctor op ; @@ -708,7 +692,7 @@ RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_AddVector<2,2>", x.dimension(0) , op ); return r; } @@ -723,9 +707,9 @@ RVector MV_Add( const RVector & r,const aVector &av,const XVector & x, return MV_AddVector( r,av,x,bv,y,a,b); if(x.dimension_1()==1) { - typedef View RVector1D; - typedef View XVector1D; - typedef View YVector1D; + typedef View RVector1D; + typedef View XVector1D; + typedef View YVector1D; RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); @@ -741,9 +725,9 @@ template RVector MV_Add( const RVector & r, const XVector & x, const YVector & y) { if(x.dimension_1()==1) { - typedef View RVector1D; - typedef View XVector1D; - typedef View YVector1D; + typedef View RVector1D; + typedef View XVector1D; + typedef View YVector1D; RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); @@ -752,7 +736,7 @@ RVector MV_Add( const RVector & r, const XVector & x, const YVector & y) V_Add(r_1d,x_1d,y_1d); return r; } else { - typename XVector::scalar_type a = 1.0; + typename XVector::value_type a = 1.0; return MV_Add(r,a,x,a,y,1,1); } } @@ -761,9 +745,9 @@ template RVector MV_Add( const RVector & r, const XVector & x, const bVector & bv, const YVector & y ) { if(x.dimension_1()==1) { - typedef View RVector1D; - typedef View XVector1D; - typedef View YVector1D; + typedef View RVector1D; + typedef View XVector1D; + typedef View YVector1D; RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); @@ -779,7 +763,6 @@ RVector MV_Add( const RVector & r, const XVector & x, const bVector & bv, const template struct MV_DotProduct_Right_FunctorVector { - typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; typedef typename XVector::value_type value_type[]; size_type value_count; @@ -826,7 +809,6 @@ struct MV_DotProduct_Right_FunctorVector template struct MV_DotProduct_Right_FunctorUnroll { - typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; typedef typename XVector::value_type value_type[]; size_type value_count; @@ -875,7 +857,7 @@ rVector MV_Dot(const rVector &r, const XVector & x, const YVector & y, int n = - op.m_y = y; op.value_count = numVecs; - Kokkos::parallel_reduce( n , op, r ); + Kokkos::parallel_reduce("MV_Dot(>16)", n , op, r ); return r; } else @@ -885,7 +867,7 @@ rVector MV_Dot(const rVector &r, const XVector & x, const YVector & y, int n = - op.m_x = x; op.m_y = y; op.value_count = numVecs; - Kokkos::parallel_reduce( n , op, r ); + Kokkos::parallel_reduce("MV_Dot(16)", n , op, r ); break; } case 15: { @@ -1002,12 +984,12 @@ rVector MV_Dot(const rVector &r, const XVector & x, const YVector & y, int n = - break; } case 1: { - typedef View XVector1D; - typedef View YVector1D; + typedef View XVector1D; + typedef View YVector1D; XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 ); - r[0] = V_Dot(x_1d,y_1d,n); + r[0] = V_Dot("V_Dot",x_1d,y_1d,n); break; } } @@ -1021,7 +1003,6 @@ rVector MV_Dot(const rVector &r, const XVector & x, const YVector & y, int n = - template struct V_MulScalarFunctor { - typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; RVector m_r; @@ -1039,7 +1020,6 @@ struct V_MulScalarFunctor template struct V_MulScalarFunctorSelf { - typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; XVector m_x; @@ -1053,15 +1033,15 @@ struct V_MulScalarFunctorSelf } }; -template -RVector V_MulScalar( const RVector & r, const typename Kokkos::View & a, const XVector & x) +template +RVector V_MulScalar( const RVector & r, const typename Kokkos::View & a, const XVector & x) { - typedef typename Kokkos::View aVector; + typedef typename Kokkos::View aVector; if(r==x) { V_MulScalarFunctorSelf op ; op.m_x = x ; op.m_a = a ; - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_MulScalarSelf", x.dimension(0) , op ); return r; } @@ -1069,19 +1049,18 @@ RVector V_MulScalar( const RVector & r, const typename Kokkos::View -struct V_MulScalarFunctor +struct V_MulScalarFunctor { - typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; RVector m_r; typename XVector::const_type m_x ; - typename XVector::scalar_type m_a ; + typename XVector::value_type m_a ; //-------------------------------------------------------------------------- KOKKOS_INLINE_FUNCTION @@ -1092,13 +1071,12 @@ struct V_MulScalarFunctor }; template -struct V_MulScalarFunctorSelf +struct V_MulScalarFunctorSelf { - typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; XVector m_x; - typename XVector::scalar_type m_a ; + typename XVector::value_type m_a ; //-------------------------------------------------------------------------- KOKKOS_INLINE_FUNCTION @@ -1110,38 +1088,37 @@ struct V_MulScalarFunctorSelf template -RVector V_MulScalar( const RVector & r, const typename XVector::scalar_type &a, const XVector & x) +RVector V_MulScalar( const RVector & r, const typename XVector::value_type &a, const XVector & x) { if(r==x) { - V_MulScalarFunctorSelf op ; + V_MulScalarFunctorSelf op ; op.m_x = x ; op.m_a = a ; - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_MulScalarSelf", x.dimension(0) , op ); return r; } - V_MulScalarFunctor op ; + V_MulScalarFunctor op ; op.m_r = r ; op.m_x = x ; op.m_a = a ; - Kokkos::parallel_for( x.dimension(0) , op ); + Kokkos::parallel_for("MV_MulScalar", x.dimension(0) , op ); return r; } template struct V_AddVectorFunctor { - typedef typename RVector::device_type device_type; typedef typename RVector::size_type size_type; - typedef typename XVector::scalar_type scalar_type; + typedef typename XVector::value_type value_type; RVector m_r ; typename XVector::const_type m_x ; typename YVector::const_type m_y ; - const scalar_type m_a; - const scalar_type m_b; + const value_type m_a; + const value_type m_b; //-------------------------------------------------------------------------- - V_AddVectorFunctor(const RVector& r, const scalar_type& a,const XVector& x,const scalar_type& b,const YVector& y): + V_AddVectorFunctor(const RVector& r, const value_type& a,const XVector& x,const value_type& b,const YVector& y): m_r(r),m_x(x),m_y(y),m_a(a),m_b(b) { } @@ -1172,14 +1149,13 @@ struct V_AddVectorFunctor template struct V_AddVectorSelfFunctor { - typedef typename RVector::device_type device_type; typedef typename RVector::size_type size_type; - typedef typename XVector::scalar_type scalar_type; + typedef typename XVector::value_type value_type; RVector m_r ; typename XVector::const_type m_x ; - const scalar_type m_a; + const value_type m_a; - V_AddVectorSelfFunctor(const RVector& r, const scalar_type& a,const XVector& x): + V_AddVectorSelfFunctor(const RVector& r, const value_type& a,const XVector& x): m_r(r),m_x(x),m_a(a) { } @@ -1195,26 +1171,26 @@ struct V_AddVectorSelfFunctor } }; template -RVector V_AddVector( const RVector & r,const typename XVector::scalar_type &av,const XVector & x, - const typename XVector::scalar_type &bv, const YVector & y,int n=-1) +RVector V_AddVector( const RVector & r,const typename XVector::value_type &av,const XVector & x, + const typename XVector::value_type &bv, const YVector & y,int n=-1) { if(n == -1) n = x.dimension_0(); if(r.ptr_on_device()==x.ptr_on_device() && doalpha == 1) { V_AddVectorSelfFunctor f(r,bv,y); - parallel_for(n,f); + parallel_for("V_AddVectorSelf",n,f); } else if(r.ptr_on_device()==y.ptr_on_device() && dobeta == 1) { V_AddVectorSelfFunctor f(r,av,x); - parallel_for(n,f); + parallel_for("V_AddVectorSelf",n,f); } else { V_AddVectorFunctor f(r,av,x,bv,y); - parallel_for(n,f); + parallel_for("V_AddVector",n,f); } return r; } template -RVector V_AddVector( const RVector & r,const typename XVector::scalar_type &av,const XVector & x, - const typename YVector::scalar_type &bv, const YVector & y, int n = -1, +RVector V_AddVector( const RVector & r,const typename XVector::value_type &av,const XVector & x, + const typename YVector::value_type &bv, const YVector & y, int n = -1, int a=2,int b=2) { if(a==-1) { @@ -1264,7 +1240,7 @@ RVector V_Add( const RVector & r, const XVector & x, const YVector & y, int n=-1 } template -RVector V_Add( const RVector & r, const XVector & x, const typename XVector::scalar_type & bv, const YVector & y,int n=-1 ) +RVector V_Add( const RVector & r, const XVector & x, const typename XVector::value_type & bv, const YVector & y,int n=-1 ) { int b = 2; //if(bv == 0) b = 0; @@ -1274,7 +1250,7 @@ RVector V_Add( const RVector & r, const XVector & x, const typename XVector::sca } template -RVector V_Add( const RVector & r, const typename XVector::scalar_type & av, const XVector & x, const typename XVector::scalar_type & bv, const YVector & y,int n=-1 ) +RVector V_Add( const RVector & r, const typename XVector::value_type & av, const XVector & x, const typename XVector::value_type & bv, const YVector & y,int n=-1 ) { int a = 2; int b = 2; @@ -1291,9 +1267,8 @@ RVector V_Add( const RVector & r, const typename XVector::scalar_type & av, con template struct V_DotFunctor { - typedef typename XVector::device_type device_type; typedef typename XVector::size_type size_type; - typedef typename XVector::non_const_scalar_type value_type; + typedef typename XVector::non_const_value_type value_type; XVector m_x ; YVector m_y ; @@ -1307,28 +1282,15 @@ struct V_DotFunctor { sum+=m_x(i)*m_y(i); } - - KOKKOS_INLINE_FUNCTION - void init( volatile value_type &update) const - { - update = 0; - } - - KOKKOS_INLINE_FUNCTION - void join( volatile value_type &update , - const volatile value_type &source ) const - { - update += source ; - } }; template -typename XVector::scalar_type V_Dot( const XVector & x, const YVector & y, int n = -1) +typename XVector::value_type V_Dot( const XVector & x, const YVector & y, int n = -1) { V_DotFunctor f(x,y); if (n<0) n = x.dimension_0(); - typename XVector::non_const_scalar_type ret_val; - parallel_reduce(n,f,ret_val); + typename XVector::non_const_value_type ret_val; + parallel_reduce("V_Dot",n,f,ret_val); return ret_val; } }//end namespace Kokkos diff --git a/kokkos/src/Hex8_box_utils.hpp b/kokkos/src/Hex8_box_utils.hpp index f7dfb11..aec3c00 100644 --- a/kokkos/src/Hex8_box_utils.hpp +++ b/kokkos/src/Hex8_box_utils.hpp @@ -100,7 +100,7 @@ void get_hex8_node_coords_3d(Scalar x, Scalar y, Scalar z, } template -KOKKOS_INLINE_FUNCTION +inline void get_elem_nodes_and_coords(const simple_mesh_description& mesh, GlobalOrdinal elemID, @@ -161,7 +161,7 @@ get_elem_nodes_and_coords(const simple_mesh_description& mesh, } template -KOKKOS_INLINE_FUNCTION +inline void get_elem_nodes_and_coords(const simple_mesh_description& mesh, GlobalOrdinal elemID, diff --git a/kokkos/src/Kokkos_Types.hpp b/kokkos/src/Kokkos_Types.hpp index c7b3f16..232acf1 100644 --- a/kokkos/src/Kokkos_Types.hpp +++ b/kokkos/src/Kokkos_Types.hpp @@ -1,35 +1,9 @@ -#include -#ifdef KOKKOS_HAVE_PTHREAD - #include - typedef Kokkos::Threads host_device_type; - #ifndef KOKKOS_HAVE_CUDA - typedef Kokkos::Threads device_device_type; - #endif -#else - #ifdef KOKKOS_HAVE_OPENMP - #include - typedef Kokkos::OpenMP host_device_type; - #ifndef KOKKOS_HAVE_CUDA - typedef Kokkos::OpenMP device_device_type; - #endif - #else - #ifdef KOKKOS_HAVE_SERIAL - #include - typedef Kokkos::Serial host_device_type; - #ifndef KOKKOS_HAVE_CUDA - typedef Kokkos::Serial device_device_type; - #endif - #else - #error "No Kokkos Host Device defined" - #endif - #endif -#endif -#ifdef KOKKOS_HAVE_CUDA - #include - typedef Kokkos::Cuda device_device_type; -#endif +#include +#include +#include -#include +typedef Kokkos::DefaultHostExecutionSpace host_device_type; +typedef Kokkos::DefaultExecutionSpace device_device_type; typedef int GlobalOrdinal; typedef Kokkos::View v_global_ordinal; diff --git a/kokkos/src/Makefile b/kokkos/src/Makefile index bd28610..fd6e5b4 100644 --- a/kokkos/src/Makefile +++ b/kokkos/src/Makefile @@ -1,21 +1,16 @@ #----------------------------------------------------------------------- SHELL = /bin/sh -MPIPATH = /opt/mpi - - CXX = mpicxx CC = mpicc LINK = mpicxx -#Note: when using absolute paths KOKKOSPATH and KOKKOSPATH_INC should be the same -#KOKKOSPATH is using for copying source files into the Object directory -#KOKKOSPATH_INC is used during the actual compilation -KOKKOSPATH = ../kokkos -KOKKOSPATH_INC = ../../kokkos +# Kokkos Settings: -PWD = `pwd` +KOKKOS_DEVICES = OpenMP +KOKKOS_ARCH = SNB +# MiniFE Settings: MINIFE_TYPES = \ -DMINIFE_SCALAR=double \ -DMINIFE_LOCAL_ORDINAL=int \ @@ -23,101 +18,73 @@ MINIFE_TYPES = \ MINIFE_MATRIX_TYPE = -DMINIFE_CSR_MATRIX # MINIFE_MATRIX_TYPE = -DMINIFE_ELL_MATRIX - -CPPFLAGS = -O3 -I. -mavx -I../ -I../../utils -I../../fem $(MINIFE_TYPES) $(MINIFE_MATRIX_TYPE) -DMPICH_IGNORE_CXX_SEEK -I$(KOKKOSPATH_INC)/core/src -I$(KOKKOSPATH_INC)/containers/src -I$(KOKKOSPATH_INC)/linalg/src -fPIC -LINKFLAGS = -O3 -mavx - -#Use MPI -CPPFLAGS += -DHAVE_MPI -I$(MPIPATH)/include - -##Enable DEBUG -#CPPFLAGS += -g -G -DKOKKOSARRAY_EXPRESSION_CHECK -DENABLE_TRACEBACK -#LINKFLAGS += -g -#Enable Single Precision -#CPPFLAGS += -DPRECISION=1 - -SRC = $(shell ls *.cpp;) -CPY = $(PWD)/*.cpp +MINIFE_INFO = 1 +MINIFE_KERNELS = 0 -KOKKOS_SRC = $(shell cd $(KOKKOSPATH)/core/src/impl; ls *.cpp;) -KOKKOS_CPY = $(KOKKOSPATH)/core/src/impl/*.cpp -#Use OpenMP backend -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/OpenMP; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/OpenMP/*.cpp -CPPFLAGS += -DKOKKOS_HAVE_OPENMP -fopenmp -LINKFLAGS += -fopenmp +#PATHS -##Use PThreads Backend -#KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/Threads; ls *.cpp;) -#KOKKOS_CPY += $(KOKKOSPATH)/core/src/Threads/*.cpp -#CPPFLAGS += -DKOKKOS_HAVE_PTHREAD -#USRLIB += -lpthread +MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -##Use HWLOC -#HWLOCPATH = ./ -#CPPFLAGS += -DKOKKOS_HAVE_HWLOC -I$(HWLOCPATH)/include -#USRLIB += -L$(HWLOCPATH)/lib -lhwloc +MINIFE_PATH = ${MAKEFILE_PATH}.. -#----------------------------------------------------------------------- -OBJ = $(KOKKOS_SRC:.cpp=.o) +KOKKOS_PATH ?= ../kokkos +KOKKOS_PATH_INC ?= ${KOKKOS_PATH} -SYSLIB = $(LIBMPI) $(INTELLIB) $(LIBIB) +all:generate_info miniFE.x +CXXFLAGS = -O3 -# For debugging, the macro MINIFE_DEBUG will cause miniFE to dump a log file -# from each proc containing various information. -# This macro will also enable a somewhat expensive range-check on indices in -# the exchange_externals function. +LDFLAGS = -O3 -LDFLAGS = $(LINKFLAGS) -LIBS= $(USRLIB) $(SYSLIB) -# The MPICH_IGNORE_CXX_SEEK macro is required for some mpich versions, -# such as the one on my cygwin machine. +MINIFE_INCLUDES = -I./ -I${MINIFE_PATH} -I${MINIFE_PATH}/src -I${MINIFE_PATH}/kokkos/linalg/src +MINIFE_INCLUDES += -I${MINIFE_PATH}/fem -I${MINIFE_PATH}/utils -I${MINIFE_PATH}/common +override CXXFLAGS += -DMPICH_IGNORE_CXX_SEEK -fPIC $(MINIFE_TYPES) $(MINIFE_MATRIX_TYPE) ${MINIFE_INCLUDES} +override CXXFLAGS += -DMINIFE_INFO=$(MINIFE_INFO) -DMINIFE_KERNELS=$(MINIFE_KERNELS) -DUSE_MPI_WTIME +#Use MPI +override CXXFLAGS += -DHAVE_MPI -copy: - @if [ ! -d Obj ]; then mkdir Obj; fi - @echo '' > Obj/KokkosCore_config.h - @cp -p $(SRC) Obj - @cp -p $(KOKKOS_CPY) Obj - @cp Makefile Obj/Makefile - @cd Obj; ../get_common_files - @cd Obj; $(MAKE) all "OBJ = $(OBJ)" +include $(KOKKOS_PATH)/Makefile.kokkos +#Enable Single Precision +#override CXXFLAGS += -DPRECISION=1 -OBJ += BoxPartition.o YAML_Doc.o YAML_Element.o -OBJ += param_utils.o utils.o mytimer.o -OBJ += main.o +SRC = $(wildcard $(MINIFE_PATH)/src/*.cpp) +SRC += $(MINIFE_PATH)/common/YAML_Doc.cpp $(MINIFE_PATH)/common/YAML_Element.cpp +SRC += $(wildcard $(MINIFE_PATH)/utils/*.cpp) +HEADERS = $(wildcard $(MAKEFILE_PATH)/src/*.hpp) +HEADERS += $(MINIFE_PATH)/common/YAML_Doc.hpp $(MINIFE_PATH)/common/YAML_Element.hpp +HEADERS += $(wildcard $(MINIFE_PATH)/utils/*.hpp) -MINIFE_INFO = 1 -MINIFE_KERNELS = 0 +vpath %.cpp $(sort $(dir $(SRC))) -vpath %.cpp ../../utils +$(warning $(SRC)) +OBJ = $(notdir $(SRC:.cpp=.o)) +$(warning $(OBJ)) +$(warning $(HEADERS)) +$(warning $(KOKKOS_PATH)) +$(warning $(KOKKOS_CPPFLAGS)) -all:generate_info miniFE.x - -miniFE.x:$(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) ../*.hpp generate_info - $(INSTRUMENT) $(LINK) $(CXXFLAGS) $(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) -o ../miniFE.x $(LDFLAGS) $(OPTIONAL_LIBS) $(LIBS) +generate_info: + @${MINIFE_PATH}/common/generate_info_header "$(CXX)" "$(CXXFLAGS)" "miniFE" "MINIFE" + -generate_info: - ./generate_info_header "$(CXX)" "$(CXXFLAGS)" "miniFE" "MINIFE" +miniFE.x:$(OBJ) $(KOKKOS_LINK_DEPENDS) generate_info $(HEADERS) + $(INSTRUMENT) $(LINK) $(LDFLAGS) $(KOKKOS_LDFLAGS) $(OBJ) $(KOKKOS_LIBS) -o miniFE.x test: ./run_test x -%.o:%.cpp *.hpp - $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DMINIFE_INFO=$(MINIFE_INFO) -DMINIFE_KERNELS=$(MINIFE_KERNELS) -c $< - -%.o:%.c *.h - $(CC) $(CFLAGS) $(CPPFLAGS) -c $< - clean: rm -rf *.o *.a miniFE.x *.linkinfo miniFE_info.hpp Obj realclean: clean rm -f gmon.out gprof.* *~ *.yaml *.TVD.* *.mtx* *.vec* minife_debug* +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $< -o $(notdir $@) + \ No newline at end of file diff --git a/kokkos/src/Makefile.cray b/kokkos/src/Makefile.cray deleted file mode 100644 index 3e475f5..0000000 --- a/kokkos/src/Makefile.cray +++ /dev/null @@ -1,143 +0,0 @@ -#----------------------------------------------------------------------- -SHELL = /bin/sh - -MPIPATH = /opt/mpi - - -CXX = `which CC` -CC = `which CC` -LINK = `which CC` - -MPI = yes -OMP = yes - -ifeq ($(SVN), yes) - KOKKOSPATH = /opt/Trilinos/packages/kokkos - KOKKOSPATH_INC = $(KOKKOSPATH) -else - KOKKOSPATH = ../kokkos - KOKKOSPATH_INC = ../../kokkos -endif - -HWLOCPATH = ./ - -PWD = `pwd` - -SRC = $(shell ls *.cpp;) -CPY = $(PWD)/*.cpp - -KOKKOS_SRC = $(shell cd $(KOKKOSPATH)/core/src/impl; ls *.cpp;) -KOKKOS_CPY = $(KOKKOSPATH)/core/src/impl/*.cpp - -ifeq ($(OMP),yes) -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/OpenMP; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/OpenMP/*.cpp -else -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/Threads; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/Threads/*.cpp -endif - -MINIFE_TYPES = \ - -DMINIFE_SCALAR=double \ - -DMINIFE_LOCAL_ORDINAL=int \ - -DMINIFE_GLOBAL_ORDINAL=int - -MINIFE_MATRIX_TYPE = -DMINIFE_CSR_MATRIX -# MINIFE_MATRIX_TYPE = -DMINIFE_ELL_MATRIX - -#----------------------------------------------------------------------- -OBJ = $(KOKKOS_SRC:.cpp=.o) - -CPPFLAGS = -O3 -I. -I../ -I../../utils -I../../fem $(MINIFE_TYPES) $(MINIFE_MATRIX_TYPE) -DMPICH_IGNORE_CXX_SEEK -I$(KOKKOSPATH_INC)/core/src -I$(KOKKOSPATH_INC)/containers/src -I$(KOKKOSPATH_INC)/linalg/src -fPIC -LINKFLAGS = -O3 - -ifeq ($(MPI), yes) -CPPFLAGS += -DHAVE_MPI -endif - -ifeq ($(OMP),yes) -CPPFLAGS += -DKOKKOS_HAVE_OPENMP -LINKFLAGS += -else -CPPFLAGS += -DKOKKOS_HAVE_PTHREAD -h nopragma=omp -h noomp -USRLIB += -endif - -ifeq ($(HWLOC),yes) -CPPFLAGS += -DKOKKOS_HAVE_HWLOC -I$(HWLOCPATH)/include -USRLIB += -L$(HWLOCPATH)/lib -lhwloc -endif - -ifeq ($(ANSI_ALIAS), yes) -CPPFLAGS += -ansi-alias -LINKFLAGS += -ansi-alias -endif - -ifeq ($(DEBUG), yes) -CPPFLAGS += -g -G -DKOKKOSARRAY_EXPRESSION_CHECK -DENABLE_TRACEBACK -LINKFLAGS += -g -endif - -ifeq ($(LIBRT),yes) -CPPFLAGS += -DKOKKOS_USE_LIBRT -DPREC_TIMER -USRLIB += -lrt -endif - -SYSLIB = $(LIBMPI) $(INTELLIB) $(LIBIB) - - -# For debugging, the macro MINIFE_DEBUG will cause miniFE to dump a log file -# from each proc containing various information. -# This macro will also enable a somewhat expensive range-check on indices in -# the exchange_externals function. - -LDFLAGS = $(LINKFLAGS) -LIBS= $(USRLIB) $(SYSLIB) - -# The MPICH_IGNORE_CXX_SEEK macro is required for some mpich versions, -# such as the one on my cygwin machine. - -OBJ += BoxPartition.o YAML_Doc.o YAML_Element.o -OBJ += param_utils.o utils.o mytimer.o -OBJ += main.o - -copy: - @if [ ! -d Obj_cray ]; then mkdir Obj_cray; fi - @echo '' > Obj_cray/KokkosCore_config.h - @cp -p $(SRC) Obj_cray - @cp -p $(KOKKOS_CPY) Obj_cray - @cp Makefile.cray Obj_cray/Makefile - @cd Obj_cray; ../get_common_files - @cd Obj_cray; $(MAKE) all "OBJ = $(OBJ)" - - - - -MINIFE_INFO = 1 -MINIFE_KERNELS = 0 - -vpath %.cpp ../../utils - -all:generate_info miniFE.cray - -miniFE.cray:$(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) ../*.hpp generate_info - $(INSTRUMENT) $(LINK) $(CXXFLAGS) $(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) -o ../miniFE.cray $(LDFLAGS) $(OPTIONAL_LIBS) $(LIBS) - -generate_info: - ./generate_info_header "$(CXX)" "$(CXXFLAGS)" "miniFE" "MINIFE" - -test: - ./run_test cray - -%.o:%.cpp *.hpp - $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DMINIFE_INFO=$(MINIFE_INFO) -DMINIFE_KERNELS=$(MINIFE_KERNELS) -c $< - -%.o:%.c *.h - $(CC) $(CFLAGS) $(CPPFLAGS) -c $< - -clean: - rm -rf *.o *.a miniFE.cray *.linkinfo miniFE_info.hpp Obj_cray - -realclean: clean - rm -f gmon.out gprof.* *~ *.yaml *.TVD.* *.mtx* *.vec* minife_debug* - diff --git a/kokkos/src/Makefile.cuda b/kokkos/src/Makefile.cuda deleted file mode 100644 index be9a303..0000000 --- a/kokkos/src/Makefile.cuda +++ /dev/null @@ -1,176 +0,0 @@ -#----------------------------------------------------------------------- -SHELL = /bin/sh - -MPIPATH = /opt/mpi - -CXX=nvcc -CC=nvcc -LINK = $(MPIPATH)/bin/mpicxx - -CUDA = yes -MPI = yes -CUDA_ARCH = sm_35 - -ifeq ($(SVN), yes) - KOKKOSPATH = /opt/Trilinos/packages/kokkos - KOKKOSPATH_INC = $(KOKKOSPATH) -else - #when taking relative paths the include path must be one level further down - #because it starts off in the Obj directory - KOKKOSPATH = ../kokkos - KOKKOSPATH_INC = ../../kokkos -endif - -HWLOCPATH = ./ -CUDAPATH = /usr/local/cuda - -PWD = `pwd` - -SRC = $(shell ls *.cpp;) -CPY = $(PWD)/*.cpp - -KOKKOS_SRC = $(shell cd $(KOKKOSPATH)/core/src/impl; ls *.cpp;) -KOKKOS_CPY = $(KOKKOSPATH)/core/src/impl/*.cpp - -ifeq ($(OMP),yes) -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/OpenMP; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/OpenMP/*.cpp -else -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/Threads; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/Threads/*.cpp -endif - -ifeq ($(CUDA),yes) -KOKKOS_CUDASRC += $(shell cd $(KOKKOSPATH)/core/src/Cuda; ls *.cu;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/Cuda/*.cu -endif - -MINIFE_TYPES = \ - -DMINIFE_SCALAR=double \ - -DMINIFE_LOCAL_ORDINAL=int \ - -DMINIFE_GLOBAL_ORDINAL=int - -MINIFE_MATRIX_TYPE = -DMINIFE_CSR_MATRIX -# MINIFE_MATRIX_TYPE = -DMINIFE_ELL_MATRIX - -#----------------------------------------------------------------------- -OBJ = $(KOKKOS_SRC:.cpp=.o) $(KOKKOS_CUDASRC:.cu=.o) - -CPPFLAGS = -m64 -O3 -I. -I../ -I../../utils -I../../fem $(MINIFE_TYPES) $(MINIFE_MATRIX_TYPE) -DMPICH_IGNORE_CXX_SEEK -I$(KOKKOSPATH_INC)/core/src/ -I$(KOKKOSPATH_INC)/containers/src -I$(KOKKOSPATH_INC)/linalg/src -I$(MPIPATH)/include -arch=$(CUDA_ARCH) -maxrregcount=64 -x cu -Xcompiler -fPIC -restrict -LINKFLAGS = -m64 -O3 -L$(MPIPATH)/lib -L$(INTELPATH)/lib/intel64 -L$(CUDAPATH)/lib64 - -ifeq ($(MPI), yes) -CPPFLAGS += -DHAVE_MPI -I$(MPIPATH)/include -DGPU_MPI -endif - -ifeq ($(CUDA), yes) -CPPFLAGS += -DDEVICE=2 -DKOKKOS_HAVE_CUDA -endif - -ifeq ($(CUSPARSE), yes) -CPPFLAGS += -DKOKKOS_USE_CUSPARSE -USRLIB += -lcusparse -endif - -ifeq ($(CUBLAS), yes) -CPPFLAGS += -DKOKKOS_USE_CUBLAS -USRLIB += -lcublas -endif - -ifeq ($(AVX), yes) -CPPFLAGS += -Xcompiler -mavx -LINKFLAGS += -mavx -endif - -ifeq ($(OMP),yes) -CPPFLAGS += -DKOKKOS_HAVE_OPENMP -Xcompiler -fopenmp -LINKFLAGS += -fopenmp -else -CPPFLAGS += -DKOKKOS_HAVE_PTHREAD -USRLIB += -lpthread -endif - -ifeq ($(HWLOC),yes) -CPPFLAGS += -DKOKKOS_HAVE_HWLOC -I$(HWLOCPATH)/include -USRLIB += -L$(HWLOCPATH)/lib -lhwloc -endif - -ifeq ($(RED_PREC), yes) -CPPFLAGS += --use_fast_math -endif - -ifeq ($(DEBUG), yes) -CPPFLAGS += -g -G -DKOKKOSARRAY_EXPRESSION_CHECK -DENABLE_TRACEBACK -LINKFLAGS += -g -endif - -ifeq ($(LIBRT),yes) -CPPFLAGS += -DKOKKOS_USE_LIBRT -DPREC_TIMER -USRLIB += -lrt -endif - -ifeq ($(CUDALDG), yes) -CPPFLAGS += -DKOKKOS_USE_LDG_INTRINSIC -endif - -SYSLIB = -lcuda -lcudart $(LIBMPI) $(INTELLIB) $(LIBIB) - - -# For debugging, the macro MINIFE_DEBUG will cause miniFE to dump a log file -# from each proc containing various information. -# This macro will also enable a somewhat expensive range-check on indices in -# the exchange_externals function. - -LDFLAGS = $(LINKFLAGS) -LIBS= $(USRLIB) $(SYSLIB) - -# The MPICH_IGNORE_CXX_SEEK macro is required for some mpich versions, -# such as the one on my cygwin machine. - -OBJ += BoxPartition.o YAML_Doc.o YAML_Element.o -OBJ += param_utils.o utils.o mytimer.o -OBJ += main.o - -copy: - @if [ ! -d Obj_cuda ]; then mkdir Obj_cuda; fi - @echo '' > Obj_cuda/KokkosCore_config.h - @cp -p $(SRC) Obj_cuda - @cp -p $(KOKKOS_CPY) Obj_cuda - @cp Makefile.cuda Obj_cuda/Makefile - @cd Obj_cuda; ../get_common_files - @cd Obj_cuda; $(MAKE) all "OBJ = $(OBJ)" - - - - -MINIFE_INFO = 1 -MINIFE_KERNELS = 0 - -vpath %.cpp ../../utils - -all:generate_info miniFE.cuda - -miniFE.cuda:$(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) ../*.hpp generate_info - $(INSTRUMENT) $(LINK) $(CXXFLAGS) $(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) -o ../miniFE.cuda $(LDFLAGS) $(OPTIONAL_LIBS) $(LIBS) - -generate_info: - ./generate_info_header "$(CXX)" "$(CXXFLAGS)" "miniFE" "MINIFE" - -test: - ./run_test cuda -.SUFFIXES: .cu - -%.o:%.cpp *.hpp - $(CXX) $(CUDA_SWITCH) $(CXXFLAGS) $(CPPFLAGS) -DMINIFE_INFO=$(MINIFE_INFO) -DMINIFE_KERNELS=$(MINIFE_KERNELS) -c $< -.cu.o: - $(CXX) $(CUDA_SWITCH) $(CXXFLAGS) $(CPPFLAGS) -DMINIFE_INFO=$(MINIFE_INFO) -DMINIFE_KERNELS=$(MINIFE_KERNELS) -c $< - -%.o:%.c *.h - $(CC) $(CFLAGS) $(CPPFLAGS) -c $< - -clean: - rm -rf *.o *.a miniFE.cuda *.linkinfo miniFE_info.hpp Obj_cuda - -realclean: clean - rm -f gmon.out gprof.* *~ *.yaml *.TVD.* *.mtx* *.vec* minife_debug* - diff --git a/kokkos/src/Makefile.intel b/kokkos/src/Makefile.intel deleted file mode 100644 index ed83848..0000000 --- a/kokkos/src/Makefile.intel +++ /dev/null @@ -1,176 +0,0 @@ -#----------------------------------------------------------------------- -SHELL = /bin/sh - -MPIPATH = /opt/mpi - - -CXX = mpiicpc -CC = mpiicc -LINK = mpiicpc - -AVX = yes -MPI = yes -OMP = yes -KNC = yes - -ifeq ($(SVN), yes) - KOKKOSPATH = /opt/Trilinos/packages/kokkos - KOKKOSPATH_INC = $(KOKKOSPATH) -else - #when taking relative paths the include path must be one level further down - #because it starts off in the Obj directory - KOKKOSPATH = ../kokkos - KOKKOSPATH_INC = ../../kokkos -endif - -HWLOCPATH = ./ - -PWD = `pwd` - -SRC = $(shell ls *.cpp;) -CPY = $(PWD)/*.cpp - -KOKKOS_SRC = $(shell cd $(KOKKOSPATH)/core/src/impl; ls *.cpp;) -KOKKOS_CPY = $(KOKKOSPATH)/core/src/impl/*.cpp - -ifeq ($(OMP),yes) -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/OpenMP; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/OpenMP/*.cpp -else -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/Threads; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/Threads/*.cpp -endif - -MINIFE_TYPES = \ - -DMINIFE_SCALAR=double \ - -DMINIFE_LOCAL_ORDINAL=int \ - -DMINIFE_GLOBAL_ORDINAL=int - -MINIFE_MATRIX_TYPE = -DMINIFE_CSR_MATRIX -# MINIFE_MATRIX_TYPE = -DMINIFE_ELL_MATRIX - -#----------------------------------------------------------------------- -OBJ = $(KOKKOS_SRC:.cpp=.o) - -CPPFLAGS = -O3 -I. -I../ -I../../utils -I../../fem $(MINIFE_TYPES) $(MINIFE_MATRIX_TYPE) -DMPICH_IGNORE_CXX_SEEK -I$(KOKKOSPATH_INC)/core/src -I$(KOKKOSPATH_INC)/containers/src -I$(KOKKOSPATH_INC)/linalg/src -fPIC -restrict -LINKFLAGS = -O3 - -ifeq ($(MPI), yes) -CPPFLAGS += -DHAVE_MPI -endif - -#Check for KNC compile -ifeq ($(KNC), yes) -CPPFLAGS += -mmic -LINKFLAGS += -mmic -override AVX = -endif - -ifeq ($(AVX), yes) -CPPFLAGS += -mavx -LINKFLAGS += -mavx -endif - -ifeq ($(OMP),yes) -CPPFLAGS += -DKOKKOS_HAVE_OPENMP -fopenmp -LINKFLAGS += -fopenmp -else -CPPFLAGS += -DKOKKOS_HAVE_PTHREAD -USRLIB += -lpthread -endif - -ifeq ($(HWLOC),yes) -CPPFLAGS += -DKOKKOS_HAVE_HWLOC -I$(HWLOCPATH)/include -USRLIB += -L$(HWLOCPATH)/lib -lhwloc -endif - -ifeq ($(ANSI_ALIAS), yes) -CPPFLAGS += -ansi-alias -LINKFLAGS += -ansi-alias -endif - -ifeq ($(DEBUG), yes) -CPPFLAGS += -g -G -DKOKKOSARRAY_EXPRESSION_CHECK -DENABLE_TRACEBACK -LINKFLAGS += -g -endif - -ifeq ($(RED_PREC), yes) -ifeq ($(KNC), yes) -CPPFLAGS += -fimf-precision=low -fimf-domain-exclusion=15 -LINKFLAGS += -fimf-precision=low -fimf-domain-exclusion=15 -else -CCFLAGS += -mGLOB_default_function_attrs="use_approx_f64_divide=true" -LINKFLAGS += -mGLOB_default_function_attrs="use_approx_f64_divide=true" -endif -endif - -ifeq ($(GSUNROLL), yes) -ifeq ($(KNC), yes) -CPPFLAGS += -mGLOB_default_function_attrs="gather_scatter_loop_unroll=7; use_gather_scatter_hint=on" -LINKFLAGS += -mGLOB_default_function_attrs="gather_scatter_loop_unroll=7; use_gather_scatter_hint=on" -endif -endif - -ifeq ($(LIBRT),yes) -CPPFLAGS += -DKOKKOS_USE_LIBRT -DPREC_TIMER -USRLIB += -lrt -endif - -SYSLIB = $(LIBMPI) $(INTELLIB) $(LIBIB) - - -# For debugging, the macro MINIFE_DEBUG will cause miniFE to dump a log file -# from each proc containing various information. -# This macro will also enable a somewhat expensive range-check on indices in -# the exchange_externals function. - -LDFLAGS = $(LINKFLAGS) -LIBS= $(USRLIB) $(SYSLIB) - -# The MPICH_IGNORE_CXX_SEEK macro is required for some mpich versions, -# such as the one on my cygwin machine. - -OBJ += BoxPartition.o YAML_Doc.o YAML_Element.o -OBJ += param_utils.o utils.o mytimer.o -OBJ += main.o - -copy: - @if [ ! -d Obj_intel ]; then mkdir Obj_intel; fi - @echo '' > Obj_intel/KokkosCore_config.h - @cp -p $(SRC) Obj_intel - @cp -p $(KOKKOS_CPY) Obj_intel - @cp Makefile.intel Obj_intel/Makefile - @cd Obj_intel; ../get_common_files - @cd Obj_intel; $(MAKE) all "OBJ = $(OBJ)" - - - - -MINIFE_INFO = 1 -MINIFE_KERNELS = 0 - -vpath %.cpp ../../utils - -all:generate_info miniFE.intel - -miniFE.intel:$(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) ../*.hpp generate_info - $(INSTRUMENT) $(LINK) $(CXXFLAGS) $(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) -o ../miniFE.intel $(LDFLAGS) $(OPTIONAL_LIBS) $(LIBS) - -generate_info: - ./generate_info_header "$(CXX)" "$(CXXFLAGS)" "miniFE" "MINIFE" - -test: - ./run_test intel - -%.o:%.cpp *.hpp - $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DMINIFE_INFO=$(MINIFE_INFO) -DMINIFE_KERNELS=$(MINIFE_KERNELS) -c $< - -%.o:%.c *.h - $(CC) $(CFLAGS) $(CPPFLAGS) -c $< - -clean: - rm -rf *.o *.a miniFE.intel *.linkinfo miniFE_info.hpp Obj_intel - -realclean: clean - rm -f gmon.out gprof.* *~ *.yaml *.TVD.* *.mtx* *.vec* minife_debug* - diff --git a/kokkos/src/Makefile.openmpi b/kokkos/src/Makefile.openmpi deleted file mode 100644 index 27b9988..0000000 --- a/kokkos/src/Makefile.openmpi +++ /dev/null @@ -1,151 +0,0 @@ -#----------------------------------------------------------------------- -SHELL = /bin/sh - -MPIPATH = /opt/mpi - - -CXX = mpicxx -CC = mpicc -LINK = mpicxx - -AVX = yes -MPI = yes -OMP = yes - -ifeq ($(SVN), yes) - KOKKOSPATH = /opt/Trilinos/packages/kokkos - KOKKOSPATH_INC = $(KOKKOSPATH) -else - #when taking relative paths the include path must be one level further down - #because it starts off in the Obj directory - KOKKOSPATH = ../kokkos - KOKKOSPATH_INC = ../../kokkos -endif - -HWLOCPATH = ./ - -PWD = `pwd` - -SRC = $(shell ls *.cpp;) -CPY = $(PWD)/*.cpp - -KOKKOS_SRC = $(shell cd $(KOKKOSPATH)/core/src/impl; ls *.cpp;) -KOKKOS_CPY = $(KOKKOSPATH)/core/src/impl/*.cpp - -ifeq ($(OMP),yes) -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/OpenMP; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/OpenMP/*.cpp -else -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/Threads; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/Threads/*.cpp -endif - -MINIFE_TYPES = \ - -DMINIFE_SCALAR=double \ - -DMINIFE_LOCAL_ORDINAL=int \ - -DMINIFE_GLOBAL_ORDINAL=int - -MINIFE_MATRIX_TYPE = -DMINIFE_CSR_MATRIX -# MINIFE_MATRIX_TYPE = -DMINIFE_ELL_MATRIX - -#----------------------------------------------------------------------- -OBJ = $(KOKKOS_SRC:.cpp=.o) - -CPPFLAGS = -O3 -I. -I../ -I../../utils -I../../fem $(MINIFE_TYPES) $(MINIFE_MATRIX_TYPE) -DMPICH_IGNORE_CXX_SEEK -I$(KOKKOSPATH_INC)/core/src -I$(KOKKOSPATH_INC)/containers/src -I$(KOKKOSPATH_INC)/linalg/src -fPIC -LINKFLAGS = -O3 - -ifeq ($(MPI), yes) -CPPFLAGS += -DHAVE_MPI -endif - -ifeq ($(AVX), yes) -CPPFLAGS += -mavx -LINKFLAGS += -mavx -endif - -ifeq ($(OMP),yes) -CPPFLAGS += -DKOKKOS_HAVE_OPENMP -fopenmp -LINKFLAGS += -fopenmp -else -CPPFLAGS += -DKOKKOS_HAVE_PTHREAD -USRLIB += -lpthread -endif - -ifeq ($(HWLOC),yes) -CPPFLAGS += -DKOKKOS_HAVE_HWLOC -I$(HWLOCPATH)/include -USRLIB += -L$(HWLOCPATH)/lib -lhwloc -endif - -ifeq ($(ANSI_ALIAS), yes) -CPPFLAGS += -ansi-alias -LINKFLAGS += -ansi-alias -endif - -ifeq ($(DEBUG), yes) -CPPFLAGS += -g -G -DKOKKOSARRAY_EXPRESSION_CHECK -DENABLE_TRACEBACK -LINKFLAGS += -g -endif - -ifeq ($(LIBRT),yes) -CPPFLAGS += -DKOKKOS_USE_LIBRT -DPREC_TIMER -USRLIB += -lrt -endif - -SYSLIB = $(LIBMPI) $(INTELLIB) $(LIBIB) - - -# For debugging, the macro MINIFE_DEBUG will cause miniFE to dump a log file -# from each proc containing various information. -# This macro will also enable a somewhat expensive range-check on indices in -# the exchange_externals function. - -LDFLAGS = $(LINKFLAGS) -LIBS= $(USRLIB) $(SYSLIB) - -# The MPICH_IGNORE_CXX_SEEK macro is required for some mpich versions, -# such as the one on my cygwin machine. - -OBJ += BoxPartition.o YAML_Doc.o YAML_Element.o -OBJ += param_utils.o utils.o mytimer.o -OBJ += main.o - -copy: - @if [ ! -d Obj_openmpi ]; then mkdir Obj_openmpi; fi - @echo '' > Obj_openmpi/KokkosCore_config.h - @cp -p $(SRC) Obj_openmpi - @cp -p $(KOKKOS_CPY) Obj_openmpi - @cp Makefile.openmpi Obj_openmpi/Makefile - @cd Obj_openmpi; ../get_common_files - @cd Obj_openmpi; $(MAKE) all "OBJ = $(OBJ)" - - - - -MINIFE_INFO = 1 -MINIFE_KERNELS = 0 - -vpath %.cpp ../../utils - -all:generate_info miniFE.openmpi - -miniFE.openmpi:$(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) ../*.hpp generate_info - $(INSTRUMENT) $(LINK) $(CXXFLAGS) $(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) -o ../miniFE.openmpi $(LDFLAGS) $(OPTIONAL_LIBS) $(LIBS) - -generate_info: - ./generate_info_header "$(CXX)" "$(CXXFLAGS)" "miniFE" "MINIFE" - -test: - ./run_test openmpi - -%.o:%.cpp *.hpp - $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DMINIFE_INFO=$(MINIFE_INFO) -DMINIFE_KERNELS=$(MINIFE_KERNELS) -c $< - -%.o:%.c *.h - $(CC) $(CFLAGS) $(CPPFLAGS) -c $< - -clean: - rm -rf *.o *.a miniFE.openmpi *.linkinfo miniFE_info.hpp Obj_openmpi - -realclean: clean - rm -f gmon.out gprof.* *~ *.yaml *.TVD.* *.mtx* *.vec* minife_debug* - diff --git a/kokkos/src/Makefile.titan b/kokkos/src/Makefile.titan deleted file mode 100644 index 230b277..0000000 --- a/kokkos/src/Makefile.titan +++ /dev/null @@ -1,180 +0,0 @@ -#----------------------------------------------------------------------- -SHELL = /bin/sh - -MPIPATH = /opt/mpi - -# CXX = nvcc --compiler-bindir `which CC` -# CC = nvcc --compiler-bindir `which CC` -# LINK = nvcc --compiler-bindir `which CC` - -CC_HOST = `which CC` -CXX = nvcc -CC = nvcc -LINK = nvcc - -CUDA = yes -MPI = yes -CUDA_ARCH = sm_35 - -ifeq ($(SVN), yes) - KOKKOSPATH = /opt/Trilinos/kokkos - KOKKOSPATH_INC = $(KOKKOSPATH) -else - #when taking relative paths the include path must be one level further down - #because it starts off in the Obj directory - KOKKOSPATH = ../kokkos - KOKKOSPATH_INC = ../../kokkos -endif - -HWLOCPATH = ./ - -PWD = `pwd` - -SRC = $(shell ls *.cpp;) -CPY = $(PWD)/*.cpp - -KOKKOS_SRC = $(shell cd $(KOKKOSPATH)/core/src/impl; ls *.cpp;) -KOKKOS_CPY = $(KOKKOSPATH)/core/src/impl/*.cpp - -ifeq ($(OMP),yes) -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/OpenMP; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/OpenMP/*.cpp -else -KOKKOS_SRC += $(shell cd $(KOKKOSPATH)/core/src/Threads; ls *.cpp;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/Threads/*.cpp -endif - -ifeq ($(CUDA),yes) -KOKKOS_CUDASRC += $(shell cd $(KOKKOSPATH)/core/src/Cuda; ls *.cu;) -KOKKOS_CPY += $(KOKKOSPATH)/core/src/Cuda/*.cu -endif - -MINIFE_TYPES = \ - -DMINIFE_SCALAR=double \ - -DMINIFE_LOCAL_ORDINAL=int \ - -DMINIFE_GLOBAL_ORDINAL=int - -MINIFE_MATRIX_TYPE = -DMINIFE_CSR_MATRIX -# MINIFE_MATRIX_TYPE = -DMINIFE_ELL_MATRIX - -#----------------------------------------------------------------------- -OBJ = $(KOKKOS_SRC:.cpp=.o) $(KOKKOS_CUDASRC:.cu=.o) - -CPPFLAGS = --compiler-bindir $(CC_HOST) -m64 -O3 -I. -I../ -I../../utils -I../../fem $(MINIFE_TYPES) $(MINIFE_MATRIX_TYPE) -DMPICH_IGNORE_CXX_SEEK -I$(KOKKOSPATH_INC)/core/src/ -I$(KOKKOSPATH_INC)/containers/src -I$(KOKKOSPATH_INC)/linalg/src -I$(MPIPATH)/include -arch=$(CUDA_ARCH) -maxrregcount=64 -x cu -Xcompiler -fPIC -restrict -LINKFLAGS = --compiler-bindir $(CC_HOST) -m64 -O3 -L$(MPIPATH)/lib -L$(INTELPATH)/lib/intel64 - -ifeq ($(MPI), yes) -CPPFLAGS += -DHAVE_MPI -I$(MPIPATH)/include -DGPU_MPI -endif - -ifeq ($(CUDA), yes) -CPPFLAGS += -DDEVICE=2 -DKOKKOS_HAVE_CUDA -endif - -ifeq ($(CUSPARSE), yes) -CPPFLAGS += -DKOKKOS_USE_CUSPARSE -USRLIB += -lcusparse -endif - -ifeq ($(CUBLAS), yes) -CPPFLAGS += -DKOKKOS_USE_CUBLAS -USRLIB += -lcublas -endif - -ifeq ($(AVX), yes) -CPPFLAGS += -Xcompiler -mavx -LINKFLAGS += -mavx -endif - -ifeq ($(OMP),yes) -CPPFLAGS += -DKOKKOS_HAVE_OPENMP -Xcompiler -fopenmp -LINKFLAGS += -fopenmp -else -CPPFLAGS += -DKOKKOS_HAVE_PTHREAD -USRLIB += -lpthread -endif - -ifeq ($(HWLOC),yes) -CPPFLAGS += -DKOKKOS_HAVE_HWLOC -I$(HWLOCPATH)/include -USRLIB += -L$(HWLOCPATH)/lib -lhwloc -endif - -ifeq ($(RED_PREC), yes) -CPPFLAGS += --use_fast_math -endif - -ifeq ($(DEBUG), yes) -CPPFLAGS += -g -G -DKOKKOSARRAY_EXPRESSION_CHECK -DENABLE_TRACEBACK -LINKFLAGS += -g -endif - -ifeq ($(LIBRT),yes) -CPPFLAGS += -DKOKKOS_USE_LIBRT -DPREC_TIMER -USRLIB += -lrt -endif - -ifeq ($(CUDALDG), yes) -CPPFLAGS += -DKOKKOS_USE_LDG_INTRINSIC -endif - -SYSLIB = -lcuda -lcudart $(LIBMPI) $(INTELLIB) $(LIBIB) - - -# For debugging, the macro MINIFE_DEBUG will cause miniFE to dump a log file -# from each proc containing various information. -# This macro will also enable a somewhat expensive range-check on indices in -# the exchange_externals function. - -LDFLAGS = $(LINKFLAGS) -LIBS= $(USRLIB) $(SYSLIB) - -# The MPICH_IGNORE_CXX_SEEK macro is required for some mpich versions, -# such as the one on my cygwin machine. - -OBJ += BoxPartition.o YAML_Doc.o YAML_Element.o -OBJ += param_utils.o utils.o mytimer.o -OBJ += main.o - -copy: - @if [ ! -d Obj_titan ]; then mkdir Obj_titan; fi - @echo '' > Obj_titan/KokkosCore_config.h - @cp -p $(SRC) Obj_titan - @cp -p $(KOKKOS_CPY) Obj_titan - @cp Makefile.titan Obj_titan/Makefile - @cd Obj_titan; ../get_common_files - @cd Obj_titan; $(MAKE) all "OBJ = $(OBJ)" - - - - -MINIFE_INFO = 1 -MINIFE_KERNELS = 0 - -vpath %.cpp ../../utils - -all:generate_info miniFE.titan - -miniFE.titan:$(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) ../*.hpp generate_info - $(INSTRUMENT) $(LINK) $(CXXFLAGS) $(OBJ) $(MAIN_OBJ) $(OBJS) $(UTIL_OBJS) $(OPTIONAL_OBJS) -o ../miniFE.titan $(LDFLAGS) $(OPTIONAL_LIBS) $(LIBS) - -generate_info: - ./generate_info_header "$(CXX)" "$(CXXFLAGS)" "miniFE" "MINIFE" - -test: - ./run_test titan -.SUFFIXES: .cu - -%.o:%.cpp *.hpp - $(CXX) $(CUDA_SWITCH) $(CXXFLAGS) $(CPPFLAGS) -DMINIFE_INFO=$(MINIFE_INFO) -DMINIFE_KERNELS=$(MINIFE_KERNELS) -c $< -.cu.o: - $(CXX) $(CUDA_SWITCH) $(CXXFLAGS) $(CPPFLAGS) -DMINIFE_INFO=$(MINIFE_INFO) -DMINIFE_KERNELS=$(MINIFE_KERNELS) -c $< - -%.o:%.c *.h - $(CC) $(CFLAGS) $(CPPFLAGS) -c $< - -clean: - rm -rf *.o *.a *.x *.linkinfo miniFE_info.hpp Obj_titan - -realclean: clean - rm -f gmon.out gprof.* *~ *.yaml *.TVD.* *.mtx* *.vec* minife_debug* - diff --git a/kokkos/src/SparseMatrix_functions.hpp b/kokkos/src/SparseMatrix_functions.hpp index b5ad036..2ccfbdc 100644 --- a/kokkos/src/SparseMatrix_functions.hpp +++ b/kokkos/src/SparseMatrix_functions.hpp @@ -200,7 +200,6 @@ sum_in_symm_elem_matrix(size_t num, //std::cout<0 ? _A.rows[0] : 0; @@ -481,7 +480,7 @@ struct impose_dirichlet_functorB { {} //-------------------------------------------------------------------------- - KOKKOS_INLINE_FUNCTION + inline void operator()( const int i ) const { GlobalOrdinal row = _A.rows[i]; @@ -523,11 +522,11 @@ impose_dirichlet(typename MatrixType::ScalarType prescribed_value, GlobalOrdinal last_local_row = A.rows.size()>0 ? A.rows[A.rows.size()-1] : -1; impose_dirichlet_functorA fA(prescribed_value,A,b,bc_rows); - Kokkos::parallel_for(bc_rows.size(),fA); + Kokkos::parallel_for("impose_dirichlet_A",bc_rows.size(),fA); MatrixType::device_type::fence(); impose_dirichlet_functorB fB(prescribed_value,A,b,bc_rows); - Kokkos::parallel_for(A.rows.size(),fB); + Kokkos::parallel_for("impose_dirichlet_B",A.rows.size(),fB); MatrixType::device_type::fence(); } diff --git a/kokkos/src/Vector.hpp b/kokkos/src/Vector.hpp index 5b02637..08d8095 100644 --- a/kokkos/src/Vector.hpp +++ b/kokkos/src/Vector.hpp @@ -64,7 +64,7 @@ struct Vector { GlobalOrdinal startIndex; LocalOrdinal local_size; - Kokkos::vector coefs; + Kokkos::vector coefs; }; diff --git a/kokkos/src/Vector_functions.hpp b/kokkos/src/Vector_functions.hpp index 398753b..69de148 100644 --- a/kokkos/src/Vector_functions.hpp +++ b/kokkos/src/Vector_functions.hpp @@ -95,7 +95,7 @@ void sum_into_vector(size_t num_indices, GlobalOrdinal first = vec.startIndex; GlobalOrdinal last = first + vec.local_size - 1; - Kokkos::vector& vec_coefs = vec.coefs; + Kokkos::vector& vec_coefs = vec.coefs; for(size_t i=0; i last) continue; diff --git a/kokkos/src/exchange_externals.hpp b/kokkos/src/exchange_externals.hpp index e692a85..42ddf0d 100644 --- a/kokkos/src/exchange_externals.hpp +++ b/kokkos/src/exchange_externals.hpp @@ -47,9 +47,9 @@ template _x; - Kokkos::vector _elements_to_send; - Kokkos::vector _send_buffer; + Kokkos::vector _x; + Kokkos::vector _elements_to_send; + Kokkos::vector _send_buffer; exchange_externals_functor(const MatrixType& A, const VectorType& x):_x(x.coefs),_elements_to_send(A.elements_to_send),_send_buffer(A.send_buffer) { @@ -90,12 +90,12 @@ exchange_externals(MatrixType& A, int local_nrow = A.rows.size(); int num_neighbors = A.neighbors.size(); - const Kokkos::vector& recv_length = A.recv_length; - const Kokkos::vector& send_length = A.send_length; - const Kokkos::vector& neighbors = A.neighbors; - const Kokkos::vector& elements_to_send = A.elements_to_send; + const Kokkos::vector& recv_length = A.recv_length; + const Kokkos::vector& send_length = A.send_length; + const Kokkos::vector& neighbors = A.neighbors; + const Kokkos::vector& elements_to_send = A.elements_to_send; - Kokkos::vector& send_buffer = A.send_buffer; + Kokkos::vector& send_buffer = A.send_buffer; // // first post receives, these are immediate receives @@ -111,7 +111,7 @@ exchange_externals(MatrixType& A, // Externals are at end of locals // - Kokkos::vector& x_coefs = x.coefs; + Kokkos::vector& x_coefs = x.coefs; #ifndef GPU_MPI Scalar* x_external = x_coefs.h_view.ptr_on_device() + local_nrow; #else @@ -150,7 +150,7 @@ exchange_externals(MatrixType& A, send_buffer[i] = x.coefs[elements_to_send[i]]; }*/ exchange_externals_functor f(A,x); - Kokkos::parallel_for(total_to_be_sent,f); + Kokkos::parallel_for("exchange_externals",total_to_be_sent,f); // // Send to each neighbor @@ -222,12 +222,12 @@ begin_exchange_externals(MatrixType& A, int local_nrow = A.rows.size(); int num_neighbors = A.neighbors.size(); - const Kokkos::vector& recv_length = A.recv_length; - const Kokkos::vector& send_length = A.send_length; - const Kokkos::vector& neighbors = A.neighbors; - const Kokkos::vector& elements_to_send = A.elements_to_send; + const Kokkos::vector& recv_length = A.recv_length; + const Kokkos::vector& send_length = A.send_length; + const Kokkos::vector& neighbors = A.neighbors; + const Kokkos::vector& elements_to_send = A.elements_to_send; - Kokkos::vector send_buffer(elements_to_send.size(), 0); + Kokkos::vector send_buffer(elements_to_send.size(), 0); // // first post receives, these are immediate receives @@ -243,7 +243,7 @@ begin_exchange_externals(MatrixType& A, // Externals are at end of locals // - Kokkos::vector& x_coefs = x.coefs; + Kokkos::vector& x_coefs = x.coefs; Scalar* x_external = &(x_coefs[local_nrow]); MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); diff --git a/kokkos/src/generate_matrix_structure.hpp b/kokkos/src/generate_matrix_structure.hpp index d3eea45..ffef083 100644 --- a/kokkos/src/generate_matrix_structure.hpp +++ b/kokkos/src/generate_matrix_structure.hpp @@ -104,7 +104,7 @@ struct generate_matrix_structure_functor { box_dims[2] = box[2][1] - box[2][0]; } - KOKKOS_INLINE_FUNCTION + inline void operator() (const int &roffset) const{ int iz = roffset/(box_dims[1]*box_dims[0]) + box[2][0]; int iy = (roffset/box_dims[0])%box_dims[1] + box[1][0]; @@ -149,7 +149,7 @@ generate_matrix_structure(const simple_mesh_description functor(mesh,&A); - Kokkos::parallel_for(functor.box_dims[0]*functor.box_dims[1]*functor.box_dims[2],functor); + Kokkos::parallel_for("generate_matrix_structure",functor.box_dims[0]*functor.box_dims[1]*functor.box_dims[2],functor); host_device_type::fence(); for(int i=0;i= params.skip_device) device++; - } - - if((str = getenv("MV2_COMM_WORLD_LOCAL_RANK")) != NULL) { - cudaGetDeviceCount(&dev_count); - local_rank = atoi(str); - device = local_rank % params.num_devices; - - if(device >= params.skip_device) device++; - } - if((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) != NULL) { - cudaGetDeviceCount(&dev_count); - local_rank = atoi(str); - device = local_rank % params.num_devices; - - if(device >= params.skip_device) device++; - } - - Kokkos::Cuda::host_mirror_device_type::initialize(params.numa,params.numthreads); - Kokkos::Cuda::SelectDevice select_device(device); - Kokkos::Cuda::initialize(select_device); -#endif - int numprocs = 1, myproc = 0; miniFE::initialize_mpi(argc, argv, numprocs, myproc); + + Kokkos::initialize(argc,argv); + if(myproc==0) { std::cout << "MiniFE Mini-App, Kokkos Peer Implementation" << std::endl; } @@ -126,10 +100,6 @@ int main(int argc, char** argv) { //make sure each processor has the same parameters: miniFE::broadcast_parameters(params); -#ifndef KOKKOS_HAVE_CUDA - device_device_type::initialize(params.numa,params.numthreads); -#endif - Box global_box = { 0, params.nx, 0, params.ny, 0, params.nz }; std::vector local_boxes(numprocs); @@ -164,7 +134,7 @@ int main(int argc, char** argv) { YAML_Doc doc("miniFE", MINIFE_VERSION, ".", osstr.str()); if (myproc == 0) { add_params_to_yaml(doc, params); - add_configuration_to_yaml(doc, numprocs, params.numthreads); + add_configuration_to_yaml(doc, numprocs); add_timestring_to_yaml(doc); } @@ -183,10 +153,9 @@ int main(int argc, char** argv) { doc.add("Total Program Time",total_time); doc.generateYAML(); } -#ifdef KOKKOS_HAVE_CUDA - host_device_type::finalize(); -#endif - device_device_type::finalize(); + + Kokkos::finalize(); + miniFE::finalize_mpi(); return return_code; @@ -210,7 +179,7 @@ void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params) } } -void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads) +void add_configuration_to_yaml(YAML_Doc& doc, int numprocs) { doc.get("Global Run Parameters")->add("number of processors", numprocs); diff --git a/kokkos/src/make_local_matrix.hpp b/kokkos/src/make_local_matrix.hpp index efb99a8..5d91b3b 100644 --- a/kokkos/src/make_local_matrix.hpp +++ b/kokkos/src/make_local_matrix.hpp @@ -80,7 +80,7 @@ make_local_matrix(MatrixType& A) /////////////////////////////////////////// // Scan the indices and transform to local /////////////////////////////////////////// - Kokkos::vector& external_index = A.external_index; + Kokkos::vector& external_index = A.external_index; for(size_t i=0; i tmp_buffer(numprocs, 0); // Temp buffer space needed below + Kokkos::vector tmp_buffer(numprocs, 0); // Temp buffer space needed below // Build list of global index offset - Kokkos::vector global_index_offsets(numprocs, 0); + Kokkos::vector global_index_offsets(numprocs, 0); tmp_buffer[myproc] = start_row; // This is my start row @@ -127,7 +127,7 @@ make_local_matrix(MatrixType& A) MPI_SUM, MPI_COMM_WORLD); // Go through list of externals and find the processor that owns each - Kokkos::vector external_processor(num_external); + Kokkos::vector external_processor(num_external); for(LocalOrdinal i=0; i& external_local_index = A.external_local_index; + Kokkos::vector& external_local_index = A.external_local_index; external_local_index.on_host(); external_local_index.assign(num_external, -1); @@ -177,7 +177,7 @@ make_local_matrix(MatrixType& A) } } - Kokkos::vector new_external_processor(num_external, 0); + Kokkos::vector new_external_processor(num_external, 0); for(int i=0; i tmp_neighbors(numprocs, 0); + Kokkos::vector tmp_neighbors(numprocs, 0); int num_recv_neighbors = 0; int length = 1; @@ -231,7 +231,7 @@ make_local_matrix(MatrixType& A) /// /////////////////////////////////////////////////////////////////////// - Kokkos::vector recv_list; + Kokkos::vector recv_list; recv_list.push_back(new_external_processor[0]); for(LocalOrdinal i=1; i send_list(num_send_neighbors, 0); + Kokkos::vector send_list(num_send_neighbors, 0); // // first post receives, these are immediate receives @@ -308,7 +308,7 @@ make_local_matrix(MatrixType& A) // order given by 'external_local_index' // - Kokkos::vector new_external(num_external); + Kokkos::vector new_external(num_external); for(LocalOrdinal i=0; i lengths(num_recv_neighbors); + Kokkos::vector lengths(num_recv_neighbors); ++MPI_MY_TAG; @@ -332,9 +332,9 @@ make_local_matrix(MatrixType& A) &request[i]); } - Kokkos::vector& neighbors = A.neighbors; - Kokkos::vector& recv_length = A.recv_length; - Kokkos::vector& send_length = A.send_length; + Kokkos::vector& neighbors = A.neighbors; + Kokkos::vector& recv_length = A.recv_length; + Kokkos::vector& send_length = A.send_length; neighbors.resize(num_recv_neighbors, 0); A.request.resize(num_recv_neighbors); diff --git a/kokkos/src/perform_element_loop.hpp b/kokkos/src/perform_element_loop.hpp index b55b4a7..93662a9 100644 --- a/kokkos/src/perform_element_loop.hpp +++ b/kokkos/src/perform_element_loop.hpp @@ -57,7 +57,7 @@ struct perform_element_loop_functor { // ElemData _elem_data; //-------------------------------------------------------------------------- - KOKKOS_INLINE_FUNCTION + inline void operator()( const int i ) const { ElemData elem_data;// = _elem_data; @@ -116,7 +116,7 @@ perform_element_loop(const simple_mesh_description& mesh, compute_gradient_values(elem_data.grad_vals); struct perform_element_loop_functor f(&A,&b,mesh,h_elemIDs,elem_data); - Kokkos::parallel_for(h_elemIDs.dimension_0(),f); + Kokkos::parallel_for("perform_element_loop",h_elemIDs.dimension_0(),f); device_device_type::fence(); }