apache · Darwin2011 · Sep 28, 2016 · Oct 8, 2016 · Oct 9, 2016 · Oct 10, 2016
@@ -63,6 +63,13 @@ ifeq ($(USE_CUDNN), 1)
 	LDFLAGS += -lcudnn
 endif
 
+ifeq ($(USE_MKLDNN),1)
+	CFLAGS += -DMXNET_USE_MKLDNN=1 -I$(MKLDNN_ROOT)/include
+	LDFLAGS += -L$(MKLDNN_ROOT)/lib/ -liomp5 -lmklml_gnu -lmklml_intel
+else
+	CFLAGS += -DMXNET_USE_MKLDNN=0
+endif
+
 ifeq ($(USE_THREADED_ENGINE), 1)
 	CFLAGS += -DMXNET_USE_THREADED_ENGINE
 endif

@@ -51,6 +51,12 @@ USE_CUDNN = 0
 # whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
 USE_NVRTC = 0
 
+# whether use MKLDNN library
+USE_MKLDNN = 0
+
+# MKLDNN root library
+MKLDNN_ROOT = NONE
+
 # whether use opencv during compilation
 # you can disable it, however, you will not able to use
 # imbin iterator

diff --git a/mshadow b/mshadow
diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h
@@ -144,11 +144,11 @@ class ActivationProp : public OperatorProperty {
     const std::vector<int> &out_grad,
     const std::vector<int> &in_data,
     const std::vector<int> &out_data) const override {
-#if MXNET_USE_CUDNN == 1
+#if (MXNET_USE_CUDNN == 1) || (MXNET_USE_MKLDNN == 1)
     return {out_grad[activation::kOut], out_data[activation::kOut], in_data[activation::kData]};
 #else
     return {out_grad[activation::kOut], out_data[activation::kOut]};
-#endif  // MXNET_USE_CUDNN
+#endif  // MXNET_USE_CUDNN or MXNET_USE_MKLDNN
   }
 
   std::vector<std::pair<int, void*> > BackwardInplaceOption(

diff --git a/src/operator/activation.cc b/src/operator/activation.cc
@@ -6,12 +6,35 @@
 */
 #include "./activation-inl.h"
 #include "./mshadow_op.h"
-
+#if MXNET_USE_MKLDNN == 1
+#include "./mkldnn/mkldnn_ReLU-inl.h"
+#endif
 namespace mxnet {
 namespace op {
 template<>
 Operator *CreateOp<cpu>(ActivationParam param, int dtype) {
   Operator *op = NULL;
+#if MXNET_USE_MKLDNN == 1
+  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
+    switch (param.act_type) {
+      case activation::kReLU:
+        // MKLDNN only supports ReLU
+        op = new MKLDNNReLUOp<DType>(param);
+        break;
+      case activation::kSigmoid:
+        op = new ActivationOp<cpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad, DType>();
+        break;
+      case activation::kTanh:
+        op = new ActivationOp<cpu, mshadow_op::tanh, mshadow_op::tanh_grad, DType>();
+        break;
+      case activation::kSoftReLU:
+        op = new ActivationOp<cpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>();
+        break;
+      default:
+        LOG(FATAL) << "unknown activation type";
+    }
+  })
+#else
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     switch (param.act_type) {
       case activation::kReLU:
@@ -30,6 +53,7 @@ Operator *CreateOp<cpu>(ActivationParam param, int dtype) {
         LOG(FATAL) << "unknown activation type";
     }
   })
+#endif
   return op;
 }
 

diff --git a/src/operator/batch_norm.cc b/src/operator/batch_norm.cc
@@ -6,12 +6,19 @@
 */
 
 #include "./batch_norm-inl.h"
+#if MXNET_USE_MKLDNN == 1
+#include "./mkldnn/mkldnn_batch_norm-inl.h"
+#endif
 
 namespace mxnet {
 namespace op {
 template<>
 Operator *CreateOp<cpu>(BatchNormParam param) {
+#if MXNET_USE_MKLDNN == 1
+  return new MKLBatchNormOp<float>(param);
+#else
   return new BatchNormOp<cpu>(param);
+#endif
 }
 
 Operator *BatchNormProp::CreateOperator(Context ctx) const {

diff --git a/src/operator/convolution.cc b/src/operator/convolution.cc
@@ -6,6 +6,9 @@
 */
 
 #include "./convolution-inl.h"
+#if MXNET_USE_MKLDNN == 1
+#include "./mkldnn/mkldnn_convolution-inl.h"
+#endif
 
 namespace mxnet {
 namespace op {
@@ -15,9 +18,15 @@ Operator* CreateOp<cpu>(ConvolutionParam param, int dtype,
                         std::vector<TShape> *out_shape,
                         Context ctx) {
   Operator *op = NULL;
+#if MXNET_USE_MKLDNN == 1
+  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
+    op = new MKLDNNConvolutionOp<DType>(param);
+  })
+#else
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     op = new ConvolutionOp<cpu, DType>(param);
   })
+#endif
   return op;
 }
 

diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h
@@ -15,7 +15,6 @@
 #include <utility>
 #include "./operator_common.h"
 
-
 namespace mxnet {
 namespace op {
 
@@ -56,6 +55,9 @@ class FullyConnectedOp : public Operator {
                        const std::vector<TBlob> &aux_args) {
     using namespace mshadow;
     using namespace mshadow::expr;
+#ifdef PRINT_LAYER_TIME
+    double start_time = dmlc::GetTime();
+#endif
     if (req[fullc::kOut] == kNullOp) return;
     CHECK_EQ(req[fullc::kOut], kWriteTo);
     size_t expected = param_.no_bias ? 2 : 3;
@@ -82,6 +84,10 @@ class FullyConnectedOp : public Operator {
       Tensor<xpu, 1, DType> bias = in_data[fullc::kBias].get<xpu, 1, DType>(s);
       out += repmat(bias, data.size(0));
     }
+#ifdef PRINT_LAYER_TIME
+    double end_time = dmlc::GetTime();
+    LOG(INFO)<< "forward FC" << (end_time-start_time) << " s";
+#endif
   }
 
   virtual void Backward(const OpContext &ctx,

diff --git a/src/operator/fully_connected.cc b/src/operator/fully_connected.cc
@@ -4,11 +4,30 @@
  * \brief fully connect operator
 */
 #include "./fully_connected-inl.h"
+#if MXNET_USE_MKLDNN == 1
+#include "./mkldnn/mkldnn_fully_connected-inl.h"
+#endif
 namespace mxnet {
 namespace op {
 template<>
 Operator* CreateOp<cpu>(FullyConnectedParam param, int dtype) {
   Operator *op = NULL;
+#if MXNET_USE_MKLDNN == 1
+  switch (dtype) {
+  case mshadow::kFloat32:
+    op = new MKLDNNFullyConnectedOp<float>(param);
+    break;
+  case mshadow::kFloat64:
+    op = new MKLDNNFullyConnectedOp<double>(param);
+    break;
+  case mshadow::kFloat16:
+    LOG(FATAL) << "float16 fully connected layer is "
+                  "not supported by MKLDNN.";
+    break;
+  default:
+    LOG(FATAL) << "Unsupported type " << dtype;
+  }
+#else
   switch (dtype) {
   case mshadow::kFloat32:
     op = new FullyConnectedOp<cpu, float>(param);
@@ -23,6 +42,7 @@ Operator* CreateOp<cpu>(FullyConnectedParam param, int dtype) {
   default:
     LOG(FATAL) << "Unsupported type " << dtype;
   }
+#endif
   return op;
 }
 

diff --git a/src/operator/lrn.cc b/src/operator/lrn.cc
@@ -9,12 +9,20 @@
 #if MXNET_USE_CUDNN == 1
 #include "./cudnn_lrn-inl.h"
 #endif
+#if MXNET_USE_MKLDNN == 1
+#include "./mkldnn/mkldnn_lrn-inl.h"
+#endif
+
 
 namespace mxnet {
 namespace op {
 template<>
 Operator* CreateOp<cpu>(LRNParam param) {
+#if MXNET_USE_MKLDNN == 1
+  return new MKLDNNLocalResponseNormOp<real_t>(param);
+#else
   return new LocalResponseNormOp<cpu>(param);
+#endif
 }
 
 Operator* LocalResponseNormProp::CreateOperator(Context ctx) const {

diff --git a/src/operator/mkldnn/README.md b/src/operator/mkldnn/README.md
@@ -0,0 +1,31 @@
+# MKLDNN PLUGIN
+
+MKLDNN is one INTEL released library to accelerate Deep Neural Network (DNN) applications on Intel architecture.
+This README shows users how to setup mxnet with MKLDNN library.
+
+## prepare MKLDNN Minimal Library
+```
+  cd <MXNET ROOTDIR>
+  mkdir -p ./external/mkl
+  wget https://github.com/intel/caffe/releases/download/self_containted_MKLGOLD/mklml_lnx_2017.0.0.20160801.tgz
+  mv mklml_lnx_2017.0.0.20160801.tgz ./external/mkl
+  cd external/mkl
+  tar zxvf mklml_lnx_2017.0.0.20160801.tgz
+  cd <MXNET ROOTDIR> 
+```
+
+## update config.mk
+```
+  USE_MKLDNN = 1 # set USE_MKLDNN on
+  MKLDNN_ROOT = <MXNET ROOTDIR>/external/mkl/mklml_lnx_2017.0.0.20160801 # set MKLDNN ROOT PATH
+```
+
+## update LD_LIBRARY_PATH
+```
+  export LD_LIBRARY_PATH=<MXNET ROOTDIR>/external/mkl/mklml_lnx_2017.0.0.20160801/lib:$LD_LIBRARY_PATH
+```
+
+## build mxnet
+```
+  make -j8
+```
+13 −3		cmake/Cuda.cmake
+20 −0		mshadow/dot_engine-inl.h
+56 −0		mshadow/expr_engine-inl.h
+56 −2		mshadow/expression.h
+1 −1		mshadow/extension/slice_ex.h