-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
389 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,389 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Subdivision of CSO observed data into events" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Import libraries\n", | ||
"import os\n", | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"import plotly.graph_objects as go" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Import data\n", | ||
"os.chdir('C:\\\\Users\\\\rpal\\\\Source\\\\modelskill\\\\tmp\\\\RPAL\\\\data\\\\obs_and_model_data_Rocco')\n", | ||
"\n", | ||
"CSO = pd.read_csv('CSO.csv', sep=',', header=0, index_col=0, parse_dates=True)\n", | ||
"\n", | ||
"# Remove all the rows where the observed or modelled value is missing\n", | ||
"CSO = CSO[CSO['filtered'].notna() & CSO['model'].notna()]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Create column for detection of events" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Define variable used to detect events\n", | ||
"CSO['event_signal'] = np.max(CSO[['model', 'filtered']], axis=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Identify events start and end based on defined threshold value" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Set threshold for event detection\n", | ||
"det_thr = 0.001\n", | ||
"\n", | ||
"# Create empty DataFrame for storing events\n", | ||
"events = pd.DataFrame(columns=['start','end'], index=pd.Index([])) \n", | ||
"\n", | ||
"# Find event starts = where obs goes from <= det_thr to > det_thr\n", | ||
"start_idx = CSO['event_signal'].shift(1).le(det_thr) & CSO['event_signal'].gt(det_thr)\n", | ||
"start_event = CSO.index[start_idx]\n", | ||
"events['start'] = start_event\n", | ||
"\n", | ||
"# Find event ends = where obs goes from > det_thr to <= det_thr\n", | ||
"end_idx = CSO['event_signal'].gt(det_thr) & CSO['event_signal'].shift(-1).le(det_thr)\n", | ||
"end_event = CSO.index[end_idx]\n", | ||
"events['end'] = end_event" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Aggregate events that are separated by gaps shorter than given value" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Set min gap between events\n", | ||
"min_gap = '1 hour'\n", | ||
"\n", | ||
"# Calculate gap between events \n", | ||
"events['diff'] = events['start'] - events['end'].shift(1)\n", | ||
"\n", | ||
"# Identify events based on min_gap\n", | ||
"#events['check'] = (events['diff'] > min_gap)\n", | ||
"events['ID'] = (events['diff'] > min_gap).cumsum( ) + 1\n", | ||
"# events['fix'] = events.ID +1\n", | ||
"\n", | ||
"# Aggregate events\n", | ||
"events = events.groupby('ID').agg({'start':'first', 'end':'last'})\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Assign event index to original series" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"CSO['event'] = 0\n", | ||
"for e in events.index:\n", | ||
" CSO.loc[events['start'][e]:events['end'][e],'event'] = e\n", | ||
"\n", | ||
"# remove columns event_signal from CSO\n", | ||
"CSO = CSO.drop(columns=['event_signal'])\n", | ||
"\n", | ||
"CSO.to_csv('CSO_events.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Compute event signatures" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Event duration\n", | ||
"events['duration'] = events['end'] - events['start']\n", | ||
"events.duration\n", | ||
"\n", | ||
"# Peak observed value\n", | ||
"events['obs_peak'] = CSO.groupby('event')['obs'].max()\n", | ||
"\n", | ||
"# Peak modelled value\n", | ||
"events['mod_peak'] = CSO.groupby('event')['model'].max()\n", | ||
"\n", | ||
"# Index of peak observed value\n", | ||
"events['obs_peak_idx'] = CSO.groupby('event')['obs'].idxmax()\n", | ||
"\n", | ||
"# Index of peak modelled value\n", | ||
"events['mod_peak_idx'] = CSO.groupby('event')['model'].idxmax()\n", | ||
"\n", | ||
"# Find duration of observed values for each event\n", | ||
"events['obs_dur'] = CSO.groupby('event')['obs'].apply(\n", | ||
" lambda x: (x[x > 0].index[-1]) - (x[x > 0].index[0]) if len(x[x > 0]) > 0 else 0)\n", | ||
"\n", | ||
"# Find duration of modelled values for each event\n", | ||
"events['mod_dur'] = CSO.groupby('event')['model'].apply(\n", | ||
" lambda x: (x[x > 0].index[-1]) - (x[x > 0].index[0]) if len(x[x > 0]) > 0 else 0)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Convert CSO index to regular column and call it timestep\n", | ||
"CSO['timestamp'] = CSO.index\n", | ||
"CSO['timestep'] = (CSO.timestamp - CSO.timestamp.shift(1)).dt.total_seconds()\n", | ||
"\n", | ||
"# Find area under the curve of observed values for each event\n", | ||
"CSO['obs_AUC'] = CSO['filtered'] * CSO['timestep']\n", | ||
"events['obs_AUC'] = CSO.groupby('event')['obs_AUC'].sum()\n", | ||
"\n", | ||
"# Find area under the curve of modelled values for each event\n", | ||
"CSO['mod_AUC'] = CSO['model'] * CSO['timestep']\n", | ||
"events['mod_AUC'] = CSO.groupby('event')['mod_AUC'].sum()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>start</th>\n", | ||
" <th>end</th>\n", | ||
" <th>duration</th>\n", | ||
" <th>obs_peak</th>\n", | ||
" <th>mod_peak</th>\n", | ||
" <th>obs_peak_idx</th>\n", | ||
" <th>mod_peak_idx</th>\n", | ||
" <th>obs_dur</th>\n", | ||
" <th>mod_dur</th>\n", | ||
" <th>obs_AUC</th>\n", | ||
" <th>mod_AUC</th>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>ID</th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>2022-08-27 09:45:00</td>\n", | ||
" <td>2022-08-27 12:15:00</td>\n", | ||
" <td>0 days 02:30:00</td>\n", | ||
" <td>0.4030</td>\n", | ||
" <td>0.4441</td>\n", | ||
" <td>2022-08-27 11:15:00</td>\n", | ||
" <td>2022-08-27 11:00:00</td>\n", | ||
" <td>0 days 02:15:00</td>\n", | ||
" <td>0 days 02:30:00</td>\n", | ||
" <td>2269.98</td>\n", | ||
" <td>2854.98</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>2022-09-18 13:45:00</td>\n", | ||
" <td>2022-09-18 15:45:00</td>\n", | ||
" <td>0 days 02:00:00</td>\n", | ||
" <td>0.0000</td>\n", | ||
" <td>0.7600</td>\n", | ||
" <td>2022-09-18 13:45:00</td>\n", | ||
" <td>2022-09-18 14:00:00</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0 days 02:00:00</td>\n", | ||
" <td>0.00</td>\n", | ||
" <td>4742.19</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>2022-09-28 16:00:00</td>\n", | ||
" <td>2022-09-28 17:30:00</td>\n", | ||
" <td>0 days 01:30:00</td>\n", | ||
" <td>0.0919</td>\n", | ||
" <td>0.4449</td>\n", | ||
" <td>2022-09-28 17:00:00</td>\n", | ||
" <td>2022-09-28 16:30:00</td>\n", | ||
" <td>0 days 00:30:00</td>\n", | ||
" <td>0 days 01:30:00</td>\n", | ||
" <td>170.37</td>\n", | ||
" <td>1855.08</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>2022-09-28 21:00:00</td>\n", | ||
" <td>2022-09-28 21:30:00</td>\n", | ||
" <td>0 days 00:30:00</td>\n", | ||
" <td>0.0000</td>\n", | ||
" <td>0.0417</td>\n", | ||
" <td>2022-09-28 21:00:00</td>\n", | ||
" <td>2022-09-28 21:15:00</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0 days 00:30:00</td>\n", | ||
" <td>0.00</td>\n", | ||
" <td>65.43</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>5</th>\n", | ||
" <td>2022-10-01 15:45:00</td>\n", | ||
" <td>2022-10-01 16:45:00</td>\n", | ||
" <td>0 days 01:00:00</td>\n", | ||
" <td>0.0000</td>\n", | ||
" <td>0.2342</td>\n", | ||
" <td>2022-10-01 15:45:00</td>\n", | ||
" <td>2022-10-01 16:00:00</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0 days 01:00:00</td>\n", | ||
" <td>0.00</td>\n", | ||
" <td>706.86</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" start end duration obs_peak \\\n", | ||
"ID \n", | ||
"1 2022-08-27 09:45:00 2022-08-27 12:15:00 0 days 02:30:00 0.4030 \n", | ||
"2 2022-09-18 13:45:00 2022-09-18 15:45:00 0 days 02:00:00 0.0000 \n", | ||
"3 2022-09-28 16:00:00 2022-09-28 17:30:00 0 days 01:30:00 0.0919 \n", | ||
"4 2022-09-28 21:00:00 2022-09-28 21:30:00 0 days 00:30:00 0.0000 \n", | ||
"5 2022-10-01 15:45:00 2022-10-01 16:45:00 0 days 01:00:00 0.0000 \n", | ||
"\n", | ||
" mod_peak obs_peak_idx mod_peak_idx obs_dur \\\n", | ||
"ID \n", | ||
"1 0.4441 2022-08-27 11:15:00 2022-08-27 11:00:00 0 days 02:15:00 \n", | ||
"2 0.7600 2022-09-18 13:45:00 2022-09-18 14:00:00 0 \n", | ||
"3 0.4449 2022-09-28 17:00:00 2022-09-28 16:30:00 0 days 00:30:00 \n", | ||
"4 0.0417 2022-09-28 21:00:00 2022-09-28 21:15:00 0 \n", | ||
"5 0.2342 2022-10-01 15:45:00 2022-10-01 16:00:00 0 \n", | ||
"\n", | ||
" mod_dur obs_AUC mod_AUC \n", | ||
"ID \n", | ||
"1 0 days 02:30:00 2269.98 2854.98 \n", | ||
"2 0 days 02:00:00 0.00 4742.19 \n", | ||
"3 0 days 01:30:00 170.37 1855.08 \n", | ||
"4 0 days 00:30:00 0.00 65.43 \n", | ||
"5 0 days 01:00:00 0.00 706.86 " | ||
] | ||
}, | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"events.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Export events to csv\n", | ||
"events.to_csv('CSO_events_signatures.csv')" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "keras", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.12" | ||
}, | ||
"orig_nbformat": 4 | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |