forked from YunYang1994/TensorFlow2.0-Examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMultilayer_Perceptron.py
146 lines (117 loc) Β· 5.38 KB
/
Multilayer_Perceptron.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python
# coding: utf-8
# # A Brief History of Perceptrons
# # Multilayer Perceptron Layer
# Subsequent work with multilayer perceptrons has shown that they are capable of
# approximating an XOR operator as well as many other non-linear functions.
#
# A multilayer perceptron (MLP) is a deep, artificial neural network.
# It is composed of more than one perceptron. They are composed of an input layer to receive the signal,
# an output layer that makes a decision or prediction about the input, and in between those two,
# an arbitrary number of hidden layers that are the true computational engine of the MLP.
# MLPs with one hidden layer are capable of approximating any continuous function.
#
# Multilayer perceptrons are often applied to supervised learning problems3:
# they train on a set of input-output pairs and learn to model the correlation (or dependencies)
# between those inputs and outputs. Training involves adjusting the parameters,
# or the weights and biases, of the model in order to minimize error. Backpropagation
# is used to make those weigh and bias adjustments relative to the error, id the error
# itself can be measured in a variety of ways, including by root mean squared error (RMSE).
import numpy as np
import tensorflow as tf
from scripts.utils import write_csv
import timeit
# Parameters
learning_rate = 0.001
training_steps = 3000
batch_size = 100
display_step = 300
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# Convert to float32.
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# Flatten images to 1-D vector of 784 features (28*28).
x_train, x_test = x_train.reshape([-1, 784]), x_test.reshape([-1, 784])
# Normalize images value from [0, 255] to [0, 1].
x_train, x_test = x_train / 255., x_test / 255.
# Use tf.data API to shuffle and batch data.
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)
# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random.normal([n_input, n_hidden_1])),
'h2': tf.Variable(tf.random.normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random.normal([n_hidden_2, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random.normal([n_hidden_1])),
'b2': tf.Variable(tf.random.normal([n_hidden_2])),
'out': tf.Variable(tf.random.normal([n_classes]))
}
start_time = timeit.default_timer()
skipped_time = 0
# Create model
def multilayer_perceptron(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.sigmoid(layer_1)
# Hidden fully connected layer with 256 neurons
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
# Output fully connected layer with a neuron for each class
layer_2 = tf.nn.sigmoid(layer_2)
output = tf.matmul(layer_2, weights['out']) + biases['out']
return tf.nn.softmax(output)
# Cross-Entropy loss function.
def cross_entropy(y_pred, y_true):
# Encode label to a one hot vector.
y_true = tf.one_hot(y_true, depth=10)
# Clip prediction values to avoid log(0) error.
y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
# Compute cross-entropy.
return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))
# Accuracy metric.
def accuracy(y_pred, y_true):
# Predicted class is the index of highest score in prediction vector (i.e. argmax).
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)
# Stochastic gradient descent optimizer.
optimizer = tf.optimizers.SGD(learning_rate)
# Optimization process.
def train_step(x, y):
# Wrap computation inside a GradientTape for automatic differentiation.
with tf.GradientTape() as tape:
pred = multilayer_perceptron(x)
loss = cross_entropy(pred, y)
# Variables to update, i.e. trainable variables.
trainable_variables = list(weights.values()) + list(biases.values())
# Compute gradients.
gradients = tape.gradient(loss, trainable_variables)
# Update W and b following gradients.
optimizer.apply_gradients(zip(gradients, trainable_variables))
total_loss = 0
loss_count = 0
total_accuracy = 0
accuracy_count = 0
# Run training for the given number of steps.
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
# Run the optimization to update W and b values.
train_step(batch_x, batch_y)
if (step+1) % display_step == 0:
pred = multilayer_perceptron(batch_x)
loss = cross_entropy(pred, batch_y)
total_loss += loss
loss_count += 1
acc = accuracy(pred, batch_y)
total_accuracy += acc
accuracy_count += 1
print_time = timeit.default_timer()
print("step: %i, loss: %f, accuracy: %f" % (step+1, loss, acc))
skipped_time += timeit.default_timer() - print_time
time = timeit.default_timer() - start_time - skipped_time
avg_loss = float(total_loss) / float(loss_count)
avg_accuracy = float(total_accuracy)/ float(accuracy_count)
write_csv(__file__, accuracy=float(avg_accuracy), loss=float(avg_loss), time=time)