forked from rishis123/Air-Quality-Northeast-Patterns
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTrain_Model.py
38 lines (28 loc) · 1.27 KB
/
Train_Model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score
from sklearn import preprocessing
from sklearn.metrics import r2_score
# importing data
df = pd.read_excel('Data/CarbonMonoxideFile.xlsx')
df = df.dropna()
df['Mean Value'] = pd.to_numeric(df['Mean Value'], errors='coerce')
# Calculate IQR for the "Mean Value" column
Q1 = df['Mean Value'].quantile(0.25)
Q3 = df['Mean Value'].quantile(0.75)
IQR = Q3 - Q1
# Filter outliers from the "Mean Value" column
# df = df[~((df['Mean Value'] < (Q1 - 1.5 * IQR)) | (df['Mean Value'] > (Q3 + 1.5 * IQR)))]
X = df.drop(['Gases', 'Unnamed: 0', 'Mean Value'], axis=1)
y = df['Mean Value']
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2)
model = RandomForestRegressor()
model.fit(X_train, y_train) # Train model on training data (80% of dataset)
# Run model on testing data (20% of dataset)
predictions = model.predict(X_test)
r_squared = r2_score(y_test, predictions)
print('mean_squared_error : ', mean_squared_error(y_test, predictions))
print('mean_absolute_error : ', mean_absolute_error(y_test, predictions))
print(r_squared)