diff --git a/notebooks/parse_all.ipynb b/notebooks/parse_all.ipynb new file mode 100644 index 0000000..7d589e5 --- /dev/null +++ b/notebooks/parse_all.ipynb @@ -0,0 +1,211 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2020-02-14T15:33:38.550994Z", + "start_time": "2020-02-14T15:33:36.901949Z" + } + }, + "outputs": [], + "source": [ + "import json\n", + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "import glob\n", + "from tqdm import tqdm_notebook as tqdm\n", + "from itertools import chain\n", + "import sys\n", + "sys.path.append(r'C:\\Users\\shossein\\Documents\\personal\\research\\VisualFeatureFull')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2020-02-14T15:33:38.567949Z", + "start_time": "2020-02-14T15:33:38.552978Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from src.parser import base \n", + "import importlib\n", + "importlib.reload(base)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2020-02-14T15:33:38.573950Z", + "start_time": "2020-02-14T15:33:38.568947Z" + } + }, + "outputs": [], + "source": [ + "hdf_store_path = 'hdf.hf'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2020-02-14T15:33:38.581945Z", + "start_time": "2020-02-14T15:33:38.577950Z" + } + }, + "outputs": [], + "source": [ + "str_data_folder = r'C:\\Users\\shossein\\Documents\\personal\\research\\data\\ew'\n", + "str_faces = 'rek-exp-cluster6\\p1\\output\\cluster6\\p1\\000112473\\faces'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "start_time": "2020-02-14T15:33:36.913Z" + } + }, + "outputs": [], + "source": [ + "l_faces_json = glob.glob(os.path.join(str_data_folder, '*', '*', '*', '*', '*', '*', 'faces', '*.json'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "start_time": "2020-02-14T15:33:36.918Z" + } + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "42b93c427f78417bb133db94d832523f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=142651), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "hdf = pd.HDFStore(hdf_store_path, 'a')\n", + "\n", + "base.parse_jsons(l_faces_json, hdf, 50, 'faces')\n", + "\n", + "hdf.flush()\n", + "hdf.close()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "start_time": "2020-02-14T15:33:36.920Z" + } + }, + "outputs": [], + "source": [ + "l_labels_json = glob.glob(os.path.join(str_data_folder, '*', '*', '*', '*', '*', '*', 'labels', '*.json'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "start_time": "2020-02-14T15:33:36.925Z" + } + }, + "outputs": [], + "source": [ + "hdf = pd.HDFStore(hdf_store_path, 'a')\n", + "\n", + "base.parse_jsons(l_labels_json, hdf, 50, 'labels')\n", + "\n", + "hdf.flush()\n", + "hdf.close()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "start_time": "2020-02-14T15:33:36.930Z" + } + }, + "outputs": [], + "source": [ + "hdf = pd.HDFStore(hdf_store_path, 'a')\n", + "\n", + "base.parse_jsons(l_celeb_json, hdf, 50, 'celebrity')\n", + "\n", + "hdf.flush()\n", + "hdf.close()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}