-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathregression.py
108 lines (77 loc) · 3.06 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import numpy as np
class BaseRegression:
def __init__(self, learning_rate: float = 0.001, n_iters: int = 1000):
# Assign the variables
self.learning_rate = learning_rate
self.n_iters = n_iters
# Weights and bias
self.weights, self.bias = None, None
def fit(self, X, y):
n_samples, n_features = X.shape
self.weights, self.bias = np.zeros(n_features), 0
# Minimizing loss, and finding the correct Weights and biases using Gradient Descent
for _ in range(self.n_iters):
y_predicted = self._approximation(X, self.weights, self.bias)
dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
db = (1 / n_samples) * np.sum(y_predicted - y)
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db
def predict(self, X):
return self._predict(X, self.weights, self.bias)
def _predict(self, X, w, b):
raise NotImplementedError
def _approximation(self, X, w, b):
raise NotImplementedError
class LinearRegression(BaseRegression):
def _approximation(self, X, w, b):
return np.dot(X, w) + b
def _predict(self, X, w, b):
return np.dot(X, w) + b
class LogisticRegression(BaseRegression):
def _approximation(self, X, w, b):
linear_model = np.dot(X, w) + b
return self._sigmoid(linear_model)
def _predict(self, X, w, b):
linear_model = np.dot(X, w) + b
y_predicted = self._sigmoid(linear_model)
y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
return np.array(y_predicted_cls)
def _sigmoid(self, x):
return 1 / (np.exp(-x) + 1)
# Testing
if __name__ == "__main__":
# Imports
from sklearn.model_selection import train_test_split
from sklearn import datasets
# Utils
def r2_score(y_true, y_pred):
corr_matrix = np.corrcoef(y_true, y_pred)
corr = corr_matrix[0, 1]
return corr ** 2
def mean_squared_error(y_true, y_pred):
return np.mean((y_true - y_pred) ** 2)
def accuracy(y_true, y_pred):
accuracy = np.sum(y_true == y_pred) / len(y_true)
return accuracy
# Linear Regression
X, y = datasets.make_regression(
n_samples=100, n_features=1, noise=20, random_state=4
)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=1234
)
regressor = LinearRegression(learning_rate=0.01, n_iters=1000)
regressor.fit(X_train, y_train)
predictions = regressor.predict(X_test)
accu = r2_score(y_test, predictions)
print("Linear reg Accuracy:", accu)
# Logistic reg
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=1234
)
regressor = LogisticRegression(learning_rate=0.0001, n_iters=1000)
regressor.fit(X_train, y_train)
predictions = regressor.predict(X_test)
print("Logistic reg classification accuracy:", accuracy(y_test, predictions))