Migrate PR#3638 from master. (#3735)

* Check size requirement before creating scratchpad. * Check max scratchpad size before allocating scratchpad_buffer. * Add the same checks for CODEGEN. * Fix unused-parameter warning. * Fix a typo. * Address PR feedback. * Fix a bug. * Fix a typo.
NervanaSystems · Oct 7, 2019 · c1f3bee · c1f3bee
1 parent be738d0
commit c1f3bee
Show file tree

Hide file tree

Showing 33 changed files with 998 additions and 555 deletions.
diff --git a/src/ngraph/runtime/cpu/builder/add.cpp b/src/ngraph/runtime/cpu/builder/add.cpp
@@ -40,7 +40,7 @@ namespace ngraph
 
                     auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
                     auto sum_pd = mkldnn_emitter->get_elementwise_add_desc(node);
-                    QUERY_SCRATCHPAD(sum, sum_pd);
+                    size_t scratchpad_size = QUERY_SCRATCHPAD(sum, sum_pd);
 
                     // Add needs 4 primitives: input0, input1, result, and sum.
                     size_t add_index = mkldnn_emitter->reserve_primitive_space(4);
@@ -55,6 +55,7 @@ namespace ngraph
                     auto functor = [&,
                                     sum_pd,
                                     add_index,
+                                    scratchpad_size,
                                     arg0_buffer_index,
                                     arg1_buffer_index,
                                     out_buffer_index](CPURuntimeContext* ctx,
@@ -76,7 +77,7 @@ namespace ngraph
                             ctx, deps[2], ctx->buffer_data[out_buffer_index]);
 
                         cpu::mkldnn_utils::mkldnn_invoke_primitive(
-                            ctx, add_index, deps, cpu::mkldnn_utils::OpType::ADD);
+                            ctx, add_index, deps, cpu::mkldnn_utils::OpType::ADD, scratchpad_size);
                     };
                     functors.emplace_back(functor);
                 }

diff --git a/src/ngraph/runtime/cpu/builder/avg_pool.cpp b/src/ngraph/runtime/cpu/builder/avg_pool.cpp
@@ -55,32 +55,40 @@ namespace ngraph
                     auto avg_pool_desc =
                         mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::AvgPool>(node,
                                                                                           false);
-                    QUERY_SCRATCHPAD(pooling_forward, avg_pool_desc);
+                    size_t scratchpad_size = QUERY_SCRATCHPAD(pooling_forward, avg_pool_desc);
 
                     // AvgPool needs 3 primitives: input, result, and pooling_forward.
                     size_t avg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
                     auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
 
-                    auto functor =
-                        [&, avg_pool_desc, avg_pool_index, arg0_buffer_index, out_buffer_index](
-                            CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
-                            if (ctx->first_iteration)
-                            {
-                                mkldnn_emitter->build_pooling_forward(ctx->mkldnn_memories,
-                                                                      ctx->mkldnn_primitives,
-                                                                      ctx->mkldnn_scratchpad_mds,
-                                                                      avg_pool_desc,
-                                                                      deps,
-                                                                      avg_pool_index);
-                            }
-                            cpu::mkldnn_utils::set_memory_ptr(
-                                ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
-                            cpu::mkldnn_utils::set_memory_ptr(
-                                ctx, deps[1], ctx->buffer_data[out_buffer_index]);
-
-                            cpu::mkldnn_utils::mkldnn_invoke_primitive(
-                                ctx, avg_pool_index, deps, cpu::mkldnn_utils::OpType::AVGPOOL);
-                        };
+                    auto functor = [&,
+                                    avg_pool_desc,
+                                    avg_pool_index,
+                                    scratchpad_size,
+                                    arg0_buffer_index,
+                                    out_buffer_index](CPURuntimeContext* ctx,
+                                                      CPUExecutionContext* ectx) {
+                        if (ctx->first_iteration)
+                        {
+                            mkldnn_emitter->build_pooling_forward(ctx->mkldnn_memories,
+                                                                  ctx->mkldnn_primitives,
+                                                                  ctx->mkldnn_scratchpad_mds,
+                                                                  avg_pool_desc,
+                                                                  deps,
+                                                                  avg_pool_index);
+                        }
+                        cpu::mkldnn_utils::set_memory_ptr(
+                            ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
+                        cpu::mkldnn_utils::set_memory_ptr(
+                            ctx, deps[1], ctx->buffer_data[out_buffer_index]);
+
+                        cpu::mkldnn_utils::mkldnn_invoke_primitive(
+                            ctx,
+                            avg_pool_index,
+                            deps,
+                            cpu::mkldnn_utils::OpType::AVGPOOL,
+                            scratchpad_size);
+                    };
                     functors.emplace_back(functor);
                 }
                 else
@@ -145,7 +153,8 @@ namespace ngraph
                     auto avg_pool_desc =
                         mkldnn_emitter->get_avg_pooling_backward_desc<ngraph::op::AvgPoolBackprop>(
                             node);
-                    QUERY_SCRATCHPAD_2ARGS(avg_pooling_backward, avg_pool_fwd_desc, avg_pool_desc);
+                    size_t scratchpad_size = QUERY_SCRATCHPAD_2ARGS(
+                        avg_pooling_backward, avg_pool_fwd_desc, avg_pool_desc);
 
                     // AvgPoolBackprop needs 3 primitives: input, result, and pooling_backward.
                     size_t avg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
@@ -155,6 +164,7 @@ namespace ngraph
                                     avg_pool_desc,
                                     avg_pool_fwd_desc,
                                     avg_pool_index,
+                                    scratchpad_size,
                                     delta_buffer_index,
                                     out_buffer_index](CPURuntimeContext* ctx,
                                                       CPUExecutionContext* ectx) {
@@ -174,7 +184,11 @@ namespace ngraph
                             ctx, deps[1], ctx->buffer_data[out_buffer_index]);
 
                         cpu::mkldnn_utils::mkldnn_invoke_primitive(
-                            ctx, avg_pool_index, deps, cpu::mkldnn_utils::OpType::AVGPOOLBACKPROP);
+                            ctx,
+                            avg_pool_index,
+                            deps,
+                            cpu::mkldnn_utils::OpType::AVGPOOLBACKPROP,
+                            scratchpad_size);
                     };
                     functors.emplace_back(functor);
                 }

diff --git a/src/ngraph/runtime/cpu/builder/batch_norm.cpp b/src/ngraph/runtime/cpu/builder/batch_norm.cpp
@@ -84,7 +84,8 @@ namespace ngraph
                     auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
                     auto batchnorm_desc =
                         mkldnn_emitter->get_batchnorm_forward_desc<OP>(node, true);
-                    QUERY_SCRATCHPAD_2ARGS(batchnorm_forward, batchnorm_desc, ops);
+                    size_t scratchpad_size =
+                        QUERY_SCRATCHPAD_2ARGS(batchnorm_forward, batchnorm_desc, ops);
 
                     auto weights_shape = Shape{2, args[0].get_size()};
                     auto weights_desc = mkldnn_emitter->build_memory_descriptor(
@@ -101,6 +102,7 @@ namespace ngraph
                                     training,
                                     ops,
                                     batchnorm_index,
+                                    scratchpad_size,
                                     stacked_weights,
                                     weight_sizes,
                                     arg0_buffer_index,
@@ -140,7 +142,11 @@ namespace ngraph
                             ctx, deps[4], ctx->buffer_data[out2_buffer_index]);
 
                         cpu::mkldnn_utils::mkldnn_invoke_primitive(
-                            ctx, batchnorm_index, deps, cpu::mkldnn_utils::OpType::BATCHNORM3ARGS);
+                            ctx,
+                            batchnorm_index,
+                            deps,
+                            cpu::mkldnn_utils::OpType::BATCHNORM3ARGS,
+                            scratchpad_size);
                     };
                     functors.emplace_back(functor);
                 }
@@ -155,7 +161,8 @@ namespace ngraph
                     auto batchnorm_desc =
                         mkldnn_emitter->get_batchnorm_forward_desc<OP>(node, false);
 
-                    QUERY_SCRATCHPAD_2ARGS(batchnorm_forward, batchnorm_desc, ops);
+                    size_t scratchpad_size =
+                        QUERY_SCRATCHPAD_2ARGS(batchnorm_forward, batchnorm_desc, ops);
 
                     auto weights_shape = Shape{2, args[0].get_size()};
                     auto weights_desc = mkldnn_emitter->build_memory_descriptor(
@@ -172,6 +179,7 @@ namespace ngraph
                                     training,
                                     ops,
                                     batchnorm_index,
+                                    scratchpad_size,
                                     stacked_weights,
                                     weight_sizes,
                                     arg0_buffer_index,
@@ -211,7 +219,11 @@ namespace ngraph
                             ctx, deps[4], ctx->buffer_data[out0_buffer_index]);
 
                         cpu::mkldnn_utils::mkldnn_invoke_primitive(
-                            ctx, batchnorm_index, deps, cpu::mkldnn_utils::OpType::BATCHNORM5ARGS);
+                            ctx,
+                            batchnorm_index,
+                            deps,
+                            cpu::mkldnn_utils::OpType::BATCHNORM5ARGS,
+                            scratchpad_size);
                     };
                     functors.emplace_back(functor);
                 }
@@ -444,14 +456,16 @@ namespace ngraph
                     static_cast<const ngraph::op::BatchNormTrainingBackprop*>(node);
                 auto eps = batchnorm->get_eps_value();
                 (void)eps; // Use depends on mkl-dnn version
-                QUERY_SCRATCHPAD_3ARGS(batchnorm_backward, batchnorm_desc, input_desc, eps);
+                size_t scratchpad_size =
+                    QUERY_SCRATCHPAD_3ARGS(batchnorm_backward, batchnorm_desc, input_desc, eps);
 
                 auto functor = [&,
                                 batchnorm_desc,
                                 input_desc,
                                 weights_desc,
                                 dweights_desc,
                                 batchnorm_index,
+                                scratchpad_size,
                                 stacked_weights,
                                 stacked_dweights,
                                 weight_sizes,
@@ -499,7 +513,11 @@ namespace ngraph
                     cpu::mkldnn_utils::set_memory_ptr(ctx, deps[6], stacked_dweights.get());
 
                     cpu::mkldnn_utils::mkldnn_invoke_primitive(
-                        ctx, batchnorm_index, deps, cpu::mkldnn_utils::OpType::BATCHNORMBACKPROP);
+                        ctx,
+                        batchnorm_index,
+                        deps,
+                        cpu::mkldnn_utils::OpType::BATCHNORMBACKPROP,
+                        scratchpad_size);
 
                     memcpy(ctx->buffer_data[out1_buffer_index],
                            stacked_dweights.get(),

diff --git a/src/ngraph/runtime/cpu/builder/bounded_relu.cpp b/src/ngraph/runtime/cpu/builder/bounded_relu.cpp
@@ -44,7 +44,7 @@ namespace ngraph
                 {
                     auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
                     auto bounded_relu_desc = mkldnn_emitter->get_bounded_relu_desc(node);
-                    QUERY_SCRATCHPAD(eltwise_forward, bounded_relu_desc);
+                    size_t scratchpad_size = QUERY_SCRATCHPAD(eltwise_forward, bounded_relu_desc);
 
                     // BoundedRelu needs 3 primitives: input, result, and eltwise_forward.
                     auto bounded_relu_index = mkldnn_emitter->reserve_primitive_space(3);
@@ -53,6 +53,7 @@ namespace ngraph
                     auto functor = [&,
                                     bounded_relu_desc,
                                     bounded_relu_index,
+                                    scratchpad_size,
                                     input_buffer_index,
                                     out_buffer_index](CPURuntimeContext* ctx,
                                                       CPUExecutionContext* ectx) {
@@ -71,7 +72,11 @@ namespace ngraph
                             ctx, deps[1], ctx->buffer_data[out_buffer_index]);
 
                         cpu::mkldnn_utils::mkldnn_invoke_primitive(
-                            ctx, bounded_relu_index, deps, cpu::mkldnn_utils::OpType::BOUNDEDRELU);
+                            ctx,
+                            bounded_relu_index,
+                            deps,
+                            cpu::mkldnn_utils::OpType::BOUNDEDRELU,
+                            scratchpad_size);
                     };
                     functors.emplace_back(functor);
                 }

diff --git a/src/ngraph/runtime/cpu/builder/concat.cpp b/src/ngraph/runtime/cpu/builder/concat.cpp
@@ -101,7 +101,7 @@ namespace ngraph
                     auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
                     auto concat_pd =
                         mkldnn_emitter->get_concat_desc<ngraph::op::Concat>(node, nargs);
-                    QUERY_SCRATCHPAD(concat, concat_pd);
+                    size_t scratchpad_size = QUERY_SCRATCHPAD(concat, concat_pd);
 
                     std::vector<mkldnn::memory::desc> inputs_data_desc;
                     for (size_t i = 0; i < nargs; i++)
@@ -115,6 +115,7 @@ namespace ngraph
 
                     auto functor = [&,
                                     concat_pd,
+                                    scratchpad_size,
                                     inputs_data_desc,
                                     arg_buffer_indices,
                                     nargs,
@@ -140,7 +141,11 @@ namespace ngraph
                             ctx, deps[nargs], ctx->buffer_data[out_buffer_index]);
 
                         cpu::mkldnn_utils::mkldnn_invoke_primitive(
-                            ctx, concat_index, deps, cpu::mkldnn_utils::OpType::CONCAT);
+                            ctx,
+                            concat_index,
+                            deps,
+                            cpu::mkldnn_utils::OpType::CONCAT,
+                            scratchpad_size);
                     };
 
                     functors.emplace_back(functor);

diff --git a/src/ngraph/runtime/cpu/builder/convert_layout.cpp b/src/ngraph/runtime/cpu/builder/convert_layout.cpp
@@ -43,6 +43,8 @@ namespace ngraph
                 auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
                 auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
 
+                size_t scratchpad_size = 0;
+
 #if MKLDNN_VERSION_MAJOR < 1
                 if (input_desc.data.format == mkldnn_nchw &&
                     result_desc.data.format == mkldnn_goihw)
@@ -131,32 +133,41 @@ namespace ngraph
                         mkldnn::memory::format_tag::goihw);
                 }
 
-                mkldnn_emitter->query_scratchpad_reorder(input_desc, result_desc);
+                scratchpad_size = mkldnn_emitter->query_scratchpad_reorder(input_desc, result_desc);
 #endif
                 // ConvertLayout needs 3 primitives: input, result, and reorder.
                 size_t reorder_index = mkldnn_emitter->reserve_primitive_space(3);
                 auto& deps = mkldnn_emitter->get_primitive_deps(reorder_index);
-                auto functor =
-                    [&, input_desc, result_desc, reorder_index, arg_buffer_index, out_buffer_index](
-                        CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
-                        if (ctx->first_iteration)
-                        {
-                            mkldnn_emitter->build_reorder(ctx->mkldnn_memories,
-                                                          ctx->mkldnn_primitives,
-                                                          ctx->mkldnn_scratchpad_mds,
-                                                          input_desc,
-                                                          result_desc,
-                                                          deps,
-                                                          reorder_index);
-                        }
-                        cpu::mkldnn_utils::set_memory_ptr(
-                            ctx, deps[0], ctx->buffer_data[arg_buffer_index]);
-                        cpu::mkldnn_utils::set_memory_ptr(
-                            ctx, deps[1], ctx->buffer_data[out_buffer_index]);
+                auto functor = [&,
+                                input_desc,
+                                result_desc,
+                                reorder_index,
+                                scratchpad_size,
+                                arg_buffer_index,
+                                out_buffer_index](CPURuntimeContext* ctx,
+                                                  CPUExecutionContext* ectx) {
+                    if (ctx->first_iteration)
+                    {
+                        mkldnn_emitter->build_reorder(ctx->mkldnn_memories,
+                                                      ctx->mkldnn_primitives,
+                                                      ctx->mkldnn_scratchpad_mds,
+                                                      input_desc,
+                                                      result_desc,
+                                                      deps,
+                                                      reorder_index);
+                    }
+                    cpu::mkldnn_utils::set_memory_ptr(
+                        ctx, deps[0], ctx->buffer_data[arg_buffer_index]);
+                    cpu::mkldnn_utils::set_memory_ptr(
+                        ctx, deps[1], ctx->buffer_data[out_buffer_index]);
 
-                        cpu::mkldnn_utils::mkldnn_invoke_primitive(
-                            ctx, reorder_index, deps, cpu::mkldnn_utils::OpType::CONVERTLAYOUT);
-                    };
+                    cpu::mkldnn_utils::mkldnn_invoke_primitive(
+                        ctx,
+                        reorder_index,
+                        deps,
+                        cpu::mkldnn_utils::OpType::CONVERTLAYOUT,
+                        scratchpad_size);
+                };
                 functors.emplace_back(functor);
             }