dkurt · dolgopolov-d · Jul 16, 2020 · Jul 16, 2020
diff --git a/modules/7_nlp/include/model.hpp b/modules/7_nlp/include/model.hpp
@@ -14,4 +14,5 @@ class SQuADModel {
 private:
     Tokenizer tokenizer;
     InferenceEngine::InferRequest req;
+	std::string outputName;
 };
diff --git a/modules/7_nlp/src/model.cpp b/modules/7_nlp/src/model.cpp
@@ -17,12 +17,20 @@ SQuADModel::SQuADModel() : tokenizer(join(DATA_FOLDER, "bert-large-uncased-vocab
     // Load deep learning network into memory
     CNNNetwork net = ie.ReadNetwork(join(DATA_FOLDER, "distilbert.xml"),
                                     join(DATA_FOLDER, "distilbert.bin"));
-
+	InputInfo::Ptr inputInfo = net.getInputsInfo()["input.1"];
+	inputInfo->setPrecision(Precision::I32);
     // Initialize runnable object on CPU device
     ExecutableNetwork execNet = ie.LoadNetwork(net, "CPU");
 
     // Create a single processing thread
     req = execNet.CreateInferRequest();
+	outputName = net.getOutputsInfo().begin()->first;
+}
+
+Blob::Ptr wrapVecToBlob(const std::vector<int> str) {
+	std::vector<size_t> dims = { 1, 128 };
+	return make_shared_blob<int>(TensorDesc(Precision::I32, dims, Layout::NC),
+		(int*)str.data());
 }
 
 std::string SQuADModel::getAnswer(const std::string& question, const std::string& source) {
@@ -41,6 +49,9 @@ std::string SQuADModel::getAnswer(const std::string& question, const std::string
     std::vector<int> indices = tokenizer.tokensToIndices(tokens);
 
     // TODO: forward indices through the network and return an answer
-
+	Blob::Ptr input = wrapVecToBlob(indices);
+	req.SetBlob("input.1", input);
+	req.Infer();
+	float* output = req.GetBlob(outputName)->buffer();
     return "";
 }
diff --git a/modules/7_nlp/src/tokenizer.cpp b/modules/7_nlp/src/tokenizer.cpp
@@ -6,7 +6,36 @@
 #include <opencv2/opencv.hpp>
 
 std::vector<std::string> basicTokenize(const std::string& text) {
-    CV_Error(cv::Error::StsNotImplemented, "basicTokenize");
+	std::vector<std::string> tokens;
+	std::string copy = text;
+	std::string alpha;
+	std::string punct;
+	for (int i = 0; i < copy.size(); i++)
+		if (isupper(copy[i]))
+			copy[i] = tolower(copy[i]);
+	for (int i = 0; i < copy.size(); i++)
+		if (isalpha(copy[i]))
+			alpha.push_back(copy[i]);
+		else
+		{
+			if (!isspace(copy[i]))
+			{
+				if (!alpha.empty())
+					tokens.push_back(alpha);
+				alpha.clear();
+				punct.push_back(copy[i]);
+				tokens.push_back(punct);
+				punct.clear();
+			}
+			else
+			{
+				if (!alpha.empty())
+					tokens.push_back(alpha);
+				alpha.clear();
+			}
+		}
+	tokens.push_back(alpha);
+	return tokens;
 }
 
 std::vector<std::string> wordTokenize(const std::string& word,