TensorFlow2.0で訓練の途中に学習率を変える方法
Posted On 2019-10-20
TensorFlow2.0で訓練の途中に学習率を変える方法を、Keras APIと訓練ループを自分で書くケースとで見ていきます。従来のKerasではLearning Rate Schedulerを使いましたが、TF2.0ではどうすればいいでしょうか?
目次
Keras APIの場合
従来どおりLearning Rate Schedulerを使います。MNISTで見てみましょう。
import tensorflow as tf
import tensorflow.keras.layers as layers
import numpy as np
def create_model():
inputs = layers.Input((784,))
x = layers.Dense(128, activation="relu")(inputs)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dense(10, activation="softmax")(x)
return tf.keras.models.Model(inputs, x)
def load_data():
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 784) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 784) / 255.0
return (X_train, y_train), (X_test, y_test)
def main_keras():
(X_train, y_train), (X_test, y_test) = load_data()
model = create_model()
model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.1),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
# Learning Rate Schedulerに食わせるための関数
def lr_scheduler(epoch):
if epoch <= 5: return 0.1
else: return 1e-8
lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=128,
callbacks=[lr_callback])
lr_schedulerが学習率の変更を定義する関数です。ここでは6エポックまでが0.1、7エポックからが1e-8(極めて小さい値)としています。コールバックなので、エポックの終わりに呼び出されることに注意しましょう。
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
60000/60000 [==============================] - 1s 16us/sample - loss: 0.4461 - sparse_categorical_accuracy: 0.8739 - val_loss: 0.2352 - val_sparse_categorical_accuracy: 0.9305
Epoch 2/10
60000/60000 [==============================] - 1s 11us/sample - loss: 0.2117 - sparse_categorical_accuracy: 0.9388 - val_loss: 0.2101 - val_sparse_categorical_accuracy: 0.9351
Epoch 3/10
60000/60000 [==============================] - 1s 11us/sample - loss: 0.1603 - sparse_categorical_accuracy: 0.9533 - val_loss: 0.1375 - val_sparse_categorical_accuracy: 0.9576
Epoch 4/10
60000/60000 [==============================] - 1s 11us/sample - loss: 0.1301 - sparse_categorical_accuracy: 0.9617 - val_loss: 0.1198 - val_sparse_categorical_accuracy: 0.9630
Epoch 5/10
60000/60000 [==============================] - 1s 11us/sample - loss: 0.1088 - sparse_categorical_accuracy: 0.9683 - val_loss: 0.1152 - val_sparse_categorical_accuracy: 0.9644
Epoch 6/10
60000/60000 [==============================] - 1s 11us/sample - loss: 0.0933 - sparse_categorical_accuracy: 0.9724 - val_loss: 0.0957 - val_sparse_categorical_accuracy: 0.9681
Epoch 7/10
60000/60000 [==============================] - 1s 11us/sample - loss: 0.0778 - sparse_categorical_accuracy: 0.9775 - val_loss: 0.0957 - val_sparse_categorical_accuracy: 0.9681
Epoch 8/10
60000/60000 [==============================] - 1s 11us/sample - loss: 0.0778 - sparse_categorical_accuracy: 0.9775 - val_loss: 0.0957 - val_sparse_categorical_accuracy: 0.9681
Epoch 9/10
60000/60000 [==============================] - 1s 11us/sample - loss: 0.0778 - sparse_categorical_accuracy: 0.9775 - val_loss: 0.0957 - val_sparse_categorical_accuracy: 0.9681
Epoch 10/10
60000/60000 [==============================] - 1s 11us/sample - loss: 0.0778 - sparse_categorical_accuracy: 0.9775 - val_loss: 0.0957 - val_sparse_categorical_accuracy: 0.9681
出力はこの通りです。7エポックからほとんど損失が下がっていないのがわかります。これは学習率を極端に小さい値に変更したためです。
訓練ループを書く場合
オプティマイザのlr属性に代入してOKです。TPUのような分散訓練でも正常に動作するようです。
def main_train_loop():
(X_train, y_train), (X_test, y_test) = load_data()
trainset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
trainset = trainset.shuffle(X_train.shape[0]).batch(128)
testset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
testset = testset.batch(128)
model = create_model()
optim = tf.keras.optimizers.SGD(lr=0.1)
loss_func = tf.keras.losses.SparseCategoricalCrossentropy()
acc = tf.keras.metrics.SparseCategoricalAccuracy()
@tf.function
def train_on_batch(X, y_true):
with tf.GradientTape() as tape:
y_pred = model(X, training=True)
loss = loss_func(y_true, y_pred)
grad = tape.gradient(loss, model.trainable_weights)
optim.apply_gradients(zip(grad, model.trainable_weights))
acc.update_state(y_true, y_pred)
return loss
@tf.function
def validation_on_batch(X, y_true):
y_pred = model(X, training=False)
loss = loss_func(y_true, y_pred)
acc.update_state(y_true, y_pred)
return loss
for epoch in range(10):
acc.reset_states()
losses = []
for X, y in trainset:
losses.append(train_on_batch(X, y).numpy())
train_loss, train_acc = np.mean(np.array(losses)), acc.result().numpy()
acc.reset_states()
losses = []
for X, y in testset:
losses.append(validation_on_batch(X, y).numpy())
val_loss, val_acc = np.mean(np.array(losses)), acc.result().numpy()
print(f"Epoch = {epoch+1}, train_loss = {train_loss:.04}, train_acc = {train_loss:.04}, " +
f"val_loss = {val_loss:.04}, val_acc={val_acc:.04}")
# change learning rate
if epoch >= 5: optim.lr = 1e-8
出力は次のようになります。
Epoch = 1, train_loss = 0.4358, train_acc = 0.4358, val_loss = 0.2325, val_acc=0.9338
Epoch = 2, train_loss = 0.2066, train_acc = 0.2066, val_loss = 0.1756, val_acc=0.9491
Epoch = 3, train_loss = 0.1546, train_acc = 0.1546, val_loss = 0.1369, val_acc=0.9598
Epoch = 4, train_loss = 0.1235, train_acc = 0.1235, val_loss = 0.1263, val_acc=0.9605
Epoch = 5, train_loss = 0.1033, train_acc = 0.1033, val_loss = 0.1058, val_acc=0.9681
Epoch = 6, train_loss = 0.08812, train_acc = 0.08812, val_loss = 0.1023, val_acc=0.9686
Epoch = 7, train_loss = 0.07745, train_acc = 0.07745, val_loss = 0.1023, val_acc=0.9686
Epoch = 8, train_loss = 0.07747, train_acc = 0.07747, val_loss = 0.1023, val_acc=0.9686
Epoch = 9, train_loss = 0.07746, train_acc = 0.07746, val_loss = 0.1023, val_acc=0.9686
Epoch = 10, train_loss = 0.07744, train_acc = 0.07744, val_loss = 0.1023, val_acc=0.9686
Keras APIと同じように学習率が変更できているのが確認できます。
まとめ
- Keras APIの場合は、従来どおりにLearning Rate Schedulerを使う
- 訓練ループを自分で書く場合は、オプティマイザのlrに直接変更後の値を代入して良さそう
Shikoan's ML Blogの中の人が運営しているサークル「じゅ~しぃ~すくりぷと」の本のご案内
技術書コーナー
北海道の駅巡りコーナー