Skip to content

Commit

Permalink
Fixed paths to models
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexKoff88 committed Oct 23, 2024
1 parent e279b70 commit f6756c9
Showing 1 changed file with 51 additions and 55 deletions.
106 changes: 51 additions & 55 deletions notebooks/openvino/sentence_transformer_quantization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand All @@ -53,7 +53,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "53d4d1f1703a4e52812ea366c06f2d67",
"model_id": "a9bd847756fd467e905a7ad7a243640c",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -77,7 +77,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a3de9a9bbdd942069b85519c83267f83",
"model_id": "9d8ad91623d642f48e85b60ac823aca4",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -101,7 +101,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ebc55f3ce3974aaa8861474699d5a15f",
"model_id": "a2a7d09a573c4092a830bbaadc39f756",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -125,7 +125,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f206e4e8651f4f449f9dcb1fc11ef266",
"model_id": "b67c493aab36426090f8fafd25a17a00",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -163,7 +163,7 @@
" 'all-MiniLM-L6-v2_int8/tokenizer.json')"
]
},
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -175,16 +175,17 @@
"from optimum.intel import OVModelForFeatureExtraction, OVQuantizer, OVQuantizationConfig, OVConfig\n",
"\n",
"MODEL_ID = \"sentence-transformers/all-MiniLM-L6-v2\"\n",
"base_model_path = \"all-MiniLM-L6-v2\"\n",
"int8_ptq_model_path = \"all-MiniLM-L6-v2_int8\"\n",
"\n",
"model = OVModelForFeatureExtraction.from_pretrained(MODEL_ID)\n",
"model.save_pretrained(\"all-MiniLM-L6-v2\")\n",
"model.save_pretrained(base_model_path)\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)\n",
"tokenizer.save_pretrained(\"all-MiniLM-L6-v2\")\n",
"tokenizer.save_pretrained(base_model_path)\n",
"\n",
"DATASET_NAME = \"squad\"\n",
"dataset = datasets.load_dataset(DATASET_NAME)\n",
"int8_ptq_model_path = \"all-MiniLM-L6-v2_int8\"\n",
"\n",
"quantizer = OVQuantizer.from_pretrained(model)\n",
"\n",
"\n",
Expand Down Expand Up @@ -222,7 +223,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -262,25 +263,26 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No OpenVINO files were found for sentence-transformers/all-MiniLM-L6-v2, setting `export=True` to convert the model to the OpenVINO IR. Don't forget to save the resulting model with `.save_pretrained()`\n",
"Framework not specified. Using pt to export the model.\n",
"Using framework PyTorch: 2.4.1+cpu\n",
"Overriding 1 configuration item(s)\n",
"\t- use_cache -> False\n",
"Compiling the model to CPU ...\n",
"Compiling the model to CPU ...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Compiling the model to CPU ...\n"
]
}
],
"source": [
"model = OVModelForFeatureExtraction.from_pretrained(MODEL_ID)\n",
"model = OVModelForFeatureExtraction.from_pretrained(base_model_path)\n",
"vanilla_emb = SentenceEmbeddingPipeline(model=model, tokenizer=tokenizer)\n",
"\n",
"q_model = OVModelForFeatureExtraction.from_pretrained(int8_ptq_model_path)\n",
Expand All @@ -289,7 +291,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -302,20 +304,20 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Parameter 'function'=<function evaluate_stsb at 0x7f92780c7600> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.\n"
"Parameter 'function'=<function evaluate_stsb at 0x7fdb0c25bba0> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0f28df147f95484c955c3f20f2f954d2",
"model_id": "5cab9e8fc58245a4b395a9575017633b",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -350,15 +352,15 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"vanilla model: pearson= 0.869619439095004\n",
"quantized model: pearson= 0.869326218489249\n",
"quantized model: pearson= 0.869415534480936\n",
"The quantized model achieves 100.0 % accuracy of the fp32 model\n"
]
}
Expand Down Expand Up @@ -392,7 +394,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand All @@ -413,18 +415,18 @@
"[ INFO ] Parsing input parameters\n",
"[Step 2/11] Loading OpenVINO Runtime\n",
"[ INFO ] OpenVINO:\n",
"[ INFO ] Build ................................. 2024.5.0-16971-8a02b4c17bb\n",
"[ INFO ] Build ................................. 2024.4.1-16618-643f23d1318-releases/2024/4\n",
"[ INFO ] \n",
"[ INFO ] Device info:\n",
"[ INFO ] CPU\n",
"[ INFO ] Build ................................. 2024.5.0-16971-8a02b4c17bb\n",
"[ INFO ] Build ................................. 2024.4.1-16618-643f23d1318-releases/2024/4\n",
"[ INFO ] \n",
"[ INFO ] \n",
"[Step 3/11] Setting device configuration\n",
"[ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.LATENCY.\n",
"[Step 4/11] Reading model files\n",
"[ INFO ] Loading model files\n",
"[ INFO ] Read model took 10.87 ms\n",
"[ INFO ] Read model took 10.17 ms\n",
"[ INFO ] Original model I/O parameters:\n",
"[ INFO ] Model inputs:\n",
"[ INFO ] input_ids (node: input_ids) : i64 / [...] / [?,?]\n",
Expand All @@ -435,7 +437,7 @@
"[Step 5/11] Resizing model to match image sizes and given batch\n",
"[ INFO ] Model batch size: 1\n",
"[ INFO ] Reshaping model: 'input_ids': [1,384], 'attention_mask': [1,384], 'token_type_ids': [1,384]\n",
"[ INFO ] Reshape model took 3.02 ms\n",
"[ INFO ] Reshape model took 2.23 ms\n",
"[Step 6/11] Configuring input of the model\n",
"[ INFO ] Model inputs:\n",
"[ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,384]\n",
Expand All @@ -444,7 +446,7 @@
"[ INFO ] Model outputs:\n",
"[ INFO ] last_hidden_state (node: __module.encoder.layer.5.output.LayerNorm/aten::layer_norm/Add) : f32 / [...] / [1,384,384]\n",
"[Step 7/11] Loading the model to the device\n",
"[ INFO ] Compile model took 125.14 ms\n",
"[ INFO ] Compile model took 134.63 ms\n",
"[Step 8/11] Querying optimal runtime parameters\n",
"[ INFO ] Model:\n",
"[ INFO ] NETWORK_NAME: Model0\n",
Expand Down Expand Up @@ -476,22 +478,16 @@
"[ INFO ] Fill input 'token_type_ids' with random values \n",
"[Step 10/11] Measuring performance (Start inference synchronously, limits: 200 iterations)\n",
"[ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).\n",
"[ INFO ] First inference took 13.97 ms\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ INFO ] First inference took 12.27 ms\n",
"[Step 11/11] Dumping statistics report\n",
"[ INFO ] Execution Devices:['CPU']\n",
"[ INFO ] Count: 200 iterations\n",
"[ INFO ] Duration: 1988.82 ms\n",
"[ INFO ] Duration: 1988.84 ms\n",
"[ INFO ] Latency:\n",
"[ INFO ] Median: 9.70 ms\n",
"[ INFO ] Median: 9.74 ms\n",
"[ INFO ] Average: 9.77 ms\n",
"[ INFO ] Min: 9.54 ms\n",
"[ INFO ] Max: 11.35 ms\n",
"[ INFO ] Min: 9.59 ms\n",
"[ INFO ] Max: 11.12 ms\n",
"[ INFO ] Throughput: 100.56 FPS\n"
]
}
Expand All @@ -503,7 +499,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand All @@ -524,18 +520,18 @@
"[ INFO ] Parsing input parameters\n",
"[Step 2/11] Loading OpenVINO Runtime\n",
"[ INFO ] OpenVINO:\n",
"[ INFO ] Build ................................. 2024.5.0-16971-8a02b4c17bb\n",
"[ INFO ] Build ................................. 2024.4.1-16618-643f23d1318-releases/2024/4\n",
"[ INFO ] \n",
"[ INFO ] Device info:\n",
"[ INFO ] CPU\n",
"[ INFO ] Build ................................. 2024.5.0-16971-8a02b4c17bb\n",
"[ INFO ] Build ................................. 2024.4.1-16618-643f23d1318-releases/2024/4\n",
"[ INFO ] \n",
"[ INFO ] \n",
"[Step 3/11] Setting device configuration\n",
"[ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.LATENCY.\n",
"[Step 4/11] Reading model files\n",
"[ INFO ] Loading model files\n",
"[ INFO ] Read model took 15.46 ms\n",
"[ INFO ] Read model took 20.87 ms\n",
"[ INFO ] Original model I/O parameters:\n",
"[ INFO ] Model inputs:\n",
"[ INFO ] input_ids (node: input_ids) : i64 / [...] / [?,?]\n",
Expand All @@ -546,7 +542,7 @@
"[Step 5/11] Resizing model to match image sizes and given batch\n",
"[ INFO ] Model batch size: 1\n",
"[ INFO ] Reshaping model: 'input_ids': [1,384], 'attention_mask': [1,384], 'token_type_ids': [1,384]\n",
"[ INFO ] Reshape model took 6.89 ms\n",
"[ INFO ] Reshape model took 3.42 ms\n",
"[Step 6/11] Configuring input of the model\n",
"[ INFO ] Model inputs:\n",
"[ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,384]\n",
Expand All @@ -555,7 +551,7 @@
"[ INFO ] Model outputs:\n",
"[ INFO ] last_hidden_state (node: __module.encoder.layer.5.output.LayerNorm/aten::layer_norm/Add) : f32 / [...] / [1,384,384]\n",
"[Step 7/11] Loading the model to the device\n",
"[ INFO ] Compile model took 325.40 ms\n",
"[ INFO ] Compile model took 323.91 ms\n",
"[Step 8/11] Querying optimal runtime parameters\n",
"[ INFO ] Model:\n",
"[ INFO ] NETWORK_NAME: Model0\n",
Expand Down Expand Up @@ -587,17 +583,17 @@
"[ INFO ] Fill input 'token_type_ids' with random values \n",
"[Step 10/11] Measuring performance (Start inference synchronously, limits: 200 iterations)\n",
"[ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).\n",
"[ INFO ] First inference took 8.49 ms\n",
"[ INFO ] First inference took 6.72 ms\n",
"[Step 11/11] Dumping statistics report\n",
"[ INFO ] Execution Devices:['CPU']\n",
"[ INFO ] Count: 200 iterations\n",
"[ INFO ] Duration: 869.96 ms\n",
"[ INFO ] Duration: 853.85 ms\n",
"[ INFO ] Latency:\n",
"[ INFO ] Median: 4.17 ms\n",
"[ INFO ] Average: 4.23 ms\n",
"[ INFO ] Min: 4.08 ms\n",
"[ INFO ] Max: 6.04 ms\n",
"[ INFO ] Throughput: 229.89 FPS\n"
"[ INFO ] Median: 4.13 ms\n",
"[ INFO ] Average: 4.15 ms\n",
"[ INFO ] Min: 4.05 ms\n",
"[ INFO ] Max: 5.13 ms\n",
"[ INFO ] Throughput: 234.23 FPS\n"
]
}
],
Expand Down

0 comments on commit f6756c9

Please sign in to comment.