Facial Keypoints Detection

Dataset

Data comes from a Kaggle competition and can be found here.

Objective

Our objective is to predict keypoint positions on face images. This can be used as a building block in several applications, such as:

  • tracking faces in images and video
  • analysing facial expressions
  • detecting dysmorphic facial signs for medical diagnosis
  • biometrics / face recognition
In [2]:
# Import relevant modules
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow import keras

%matplotlib inline
In [6]:
print(tf.__version__)
print(keras.__version__)
2.1.0
2.2.4-tf
In [7]:
def load_data(test=False):
  """Load the data from csv files. Set test to True to import test data."""

  if test:
    df = pd.read_csv("data/test.csv")
  else:
    df = pd.read_csv("data/training.csv")


  # Drop rows with missing values
  df = df.dropna()

  # Image column is a list of pixels separated by a comma
  # Convert the values to np arrays
  df['Image'] = df['Image'].apply(lambda x: np.fromstring(x, sep=' '))

  # Normalize pixel values
  X = np.vstack(df['Image'].values) / 255.
  X = X.astype(np.float32)

  # Only TRAIN has target columns
  if not test:  
      y = df.drop("Image", axis=1)
      y = y.astype(np.float32)
  else:
      y = None

  return X, y
In [8]:
def visualize(id):
  plt.imshow(X_train[id].reshape(96,96), cmap="gray")
  plt.scatter(y_train.loc[id, y_train.columns[::2]],\
              y_train.loc[id, y_train.columns[1::2]],\
              marker='x', s=10, c="red")
  plt.axis("off")
In [9]:
def visualize_pred(model_predictions, id):
  plt.imshow(X_test[id].reshape(96,96), cmap="gray")
  pred = pd.DataFrame(model_predictions)
  plt.scatter(pred.loc[id, pred.columns[::2]],\
              pred.loc[id, pred.columns[1::2]],\
              marker="x", s=10, c="red")
  plt.axis("off")
In [10]:
# Plot the validation and train learning curves and loss
def learning_curves(model_history):
  pd.DataFrame(model_history.history).plot(figsize=(8,5))
  plt.grid(True)
In [11]:
# Load the data
X_train, y_train = load_data()
X_test, y_test = load_data(test=True)

print(f"X_train's shape: {X_train.shape}\ny_train's shape: {y_train.shape}\nX_test's shape: {X_test.shape}")
X_train's shape: (2140, 9216)
y_train's shape: (2140, 30)
X_test's shape: (1783, 9216)
In [12]:
fig = plt.figure(figsize=(12, 12))

for i in range(9):
    ax = fig.add_subplot(3, 3, i + 1)
    visualize(i)
    
plt.savefig("facial_keypoints.png")
In [10]:
NUM_LABELS = 30
IMAGE_SIZE = 96
BATCH_SIZE = 64
DROPOUT_RATE = 0.2
In [11]:
# Define a first model
model_1 = keras.Sequential()

model_1.add(keras.layers.Dense(128, input_shape=(IMAGE_SIZE * IMAGE_SIZE,), activation="relu"))
model_1.add(keras.layers.Dropout(DROPOUT_RATE))
model_1.add(keras.layers.Dense(64, activation="relu"))
model_1.add(keras.layers.Dropout(DROPOUT_RATE))
model_1.add(keras.layers.Dense(NUM_LABELS))

# Summary of the model
model_1.summary()

# Compile the model
model_1.compile(optimizer="adam", loss="mse", metrics=[keras.metrics.RootMeanSquaredError(name='rmse')])
    
# Train the model
history_1 = model_1.fit(X_train, y_train, epochs = 100, batch_size = BATCH_SIZE,\
                            validation_split = 0.2, verbose=0)
    
print(f"Final loss of training data: {history_1.history['loss'][-1]}")
print(f"Final loss of validation data: {history_1.history['val_loss'][-1]}")
    
 # Plot the learning curves
learning_curves(history_1)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 128)               1179776   
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 30)                1950      
=================================================================
Total params: 1,189,982
Trainable params: 1,189,982
Non-trainable params: 0
_________________________________________________________________
Final loss of training data: 52.176904981381426
Final loss of validation data: 112.7545339281314
In [12]:
# Create second model - CNN
X_train_2 = X_train.reshape(2140, IMAGE_SIZE, IMAGE_SIZE, 1)

model_2 = keras.Sequential()

model_2.add(keras.layers.Convolution2D(32, (3,3), padding='same', use_bias=False, activation="relu",\
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 1)))
model_2.add(keras.layers.BatchNormalization())

model_2.add(keras.layers.Convolution2D(32, (3,3), padding='same', use_bias=False, activation="relu"))
model_2.add(keras.layers.BatchNormalization())
model_2.add(keras.layers.MaxPool2D(pool_size=(2, 2)))

model_2.add(keras.layers.Convolution2D(64, (3,3), padding='same', use_bias=False, activation="relu"))
model_2.add(keras.layers.BatchNormalization())

model_2.add(keras.layers.Convolution2D(64, (3,3), padding='same', use_bias=False, activation="relu"))
model_2.add(keras.layers.MaxPool2D(pool_size=(2, 2)))

model_2.add(keras.layers.Convolution2D(96, (3,3), padding='same', use_bias=False, activation="relu"))
model_2.add(keras.layers.BatchNormalization())

model_2.add(keras.layers.Convolution2D(96, (3,3), padding='same', use_bias=False, activation="relu"))
model_2.add(keras.layers.BatchNormalization())
model_2.add(keras.layers.MaxPool2D(pool_size=(2, 2)))

model_2.add(keras.layers.Convolution2D(128, (3,3),padding='same', use_bias=False, activation="relu"))
model_2.add(keras.layers.BatchNormalization())

model_2.add(keras.layers.Convolution2D(128, (3,3),padding='same', use_bias=False, activation="relu"))
model_2.add(keras.layers.BatchNormalization())
model_2.add(keras.layers.MaxPool2D(pool_size=(2, 2)))

model_2.add(keras.layers.Convolution2D(256, (3,3),padding='same',use_bias=False, activation="relu"))
model_2.add(keras.layers.BatchNormalization())

model_2.add(keras.layers.Convolution2D(256, (3,3),padding='same',use_bias=False, activation="relu"))
model_2.add(keras.layers.BatchNormalization())
model_2.add(keras.layers.MaxPool2D(pool_size=(2, 2)))

model_2.add(keras.layers.Convolution2D(512, (3,3), padding='same', use_bias=False, activation="relu"))
model_2.add(keras.layers.BatchNormalization())

model_2.add(keras.layers.Convolution2D(512, (3,3), padding='same', use_bias=False, activation="relu"))
model_2.add(keras.layers.BatchNormalization())

model_2.add(keras.layers.Flatten())
model_2.add(keras.layers.Dense(512, activation="relu"))
model_2.add(keras.layers.Dropout(DROPOUT_RATE))
model_2.add(keras.layers.Dense(NUM_LABELS))

# Summary of the model
model_2.summary()

# Compile the model
model_2.compile(optimizer="adam", loss="mse", metrics=[keras.metrics.RootMeanSquaredError(name='rmse')])
    
# Train the model
history_2 = model_2.fit(X_train_2, y_train, epochs = 25, batch_size = BATCH_SIZE,\
                            validation_split = 0.2, verbose=2)
    
print(f"Final loss of training data: {history_2.history['loss'][-1]}")
print(f"Final loss of validation data: {history_2.history['val_loss'][-1]}")
    
# Plot the learning curves
learning_curves(history_2)
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 96, 96, 32)        288       
_________________________________________________________________
batch_normalization (BatchNo (None, 96, 96, 32)        128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 96, 96, 32)        9216      
_________________________________________________________________
batch_normalization_1 (Batch (None, 96, 96, 32)        128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 48, 48, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 48, 48, 64)        18432     
_________________________________________________________________
batch_normalization_2 (Batch (None, 48, 48, 64)        256       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 48, 48, 64)        36864     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 24, 64)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 24, 24, 96)        55296     
_________________________________________________________________
batch_normalization_3 (Batch (None, 24, 24, 96)        384       
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 24, 24, 96)        82944     
_________________________________________________________________
batch_normalization_4 (Batch (None, 24, 24, 96)        384       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 12, 12, 96)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 12, 12, 128)       110592    
_________________________________________________________________
batch_normalization_5 (Batch (None, 12, 12, 128)       512       
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 12, 12, 128)       147456    
_________________________________________________________________
batch_normalization_6 (Batch (None, 12, 12, 128)       512       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 6, 6, 128)         0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 6, 6, 256)         294912    
_________________________________________________________________
batch_normalization_7 (Batch (None, 6, 6, 256)         1024      
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 6, 6, 256)         589824    
_________________________________________________________________
batch_normalization_8 (Batch (None, 6, 6, 256)         1024      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 3, 3, 256)         0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 3, 3, 512)         1179648   
_________________________________________________________________
batch_normalization_9 (Batch (None, 3, 3, 512)         2048      
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 3, 3, 512)         2359296   
_________________________________________________________________
batch_normalization_10 (Batc (None, 3, 3, 512)         2048      
_________________________________________________________________
flatten (Flatten)            (None, 4608)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 512)               2359808   
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 30)                15390     
=================================================================
Total params: 7,268,414
Trainable params: 7,264,190
Non-trainable params: 4,224
_________________________________________________________________
Train on 1712 samples, validate on 428 samples
Epoch 1/25
1712/1712 - 64s - loss: 310.0069 - rmse: 17.6070 - val_loss: 390.5083 - val_rmse: 19.7613
Epoch 2/25
1712/1712 - 46s - loss: 44.4504 - rmse: 6.6671 - val_loss: 256.2662 - val_rmse: 16.0083
Epoch 3/25
1712/1712 - 48s - loss: 35.1064 - rmse: 5.9251 - val_loss: 299.7080 - val_rmse: 17.3121
Epoch 4/25
1712/1712 - 48s - loss: 29.4110 - rmse: 5.4232 - val_loss: 214.0253 - val_rmse: 14.6296
Epoch 5/25
1712/1712 - 47s - loss: 27.3038 - rmse: 5.2253 - val_loss: 86.6982 - val_rmse: 9.3112
Epoch 6/25
1712/1712 - 45s - loss: 24.2469 - rmse: 4.9241 - val_loss: 98.0841 - val_rmse: 9.9037
Epoch 7/25
1712/1712 - 46s - loss: 21.3008 - rmse: 4.6153 - val_loss: 80.1103 - val_rmse: 8.9504
Epoch 8/25
1712/1712 - 46s - loss: 19.9335 - rmse: 4.4647 - val_loss: 50.5033 - val_rmse: 7.1066
Epoch 9/25
1712/1712 - 47s - loss: 18.7743 - rmse: 4.3329 - val_loss: 40.6677 - val_rmse: 6.3771
Epoch 10/25
1712/1712 - 46s - loss: 19.3856 - rmse: 4.4029 - val_loss: 58.0963 - val_rmse: 7.6221
Epoch 11/25
1712/1712 - 46s - loss: 17.6409 - rmse: 4.2001 - val_loss: 30.8287 - val_rmse: 5.5524
Epoch 12/25
1712/1712 - 46s - loss: 15.2005 - rmse: 3.8988 - val_loss: 34.2526 - val_rmse: 5.8526
Epoch 13/25
1712/1712 - 45s - loss: 14.9518 - rmse: 3.8668 - val_loss: 30.5523 - val_rmse: 5.5274
Epoch 14/25
1712/1712 - 46s - loss: 14.5005 - rmse: 3.8080 - val_loss: 26.9228 - val_rmse: 5.1887
Epoch 15/25
1712/1712 - 46s - loss: 15.2185 - rmse: 3.9011 - val_loss: 18.1308 - val_rmse: 4.2580
Epoch 16/25
1712/1712 - 45s - loss: 13.4789 - rmse: 3.6714 - val_loss: 16.9599 - val_rmse: 4.1182
Epoch 17/25
1712/1712 - 54s - loss: 13.9524 - rmse: 3.7353 - val_loss: 15.8432 - val_rmse: 3.9804
Epoch 18/25
1712/1712 - 68s - loss: 12.8890 - rmse: 3.5901 - val_loss: 14.6392 - val_rmse: 3.8261
Epoch 19/25
1712/1712 - 60s - loss: 12.4448 - rmse: 3.5277 - val_loss: 12.8845 - val_rmse: 3.5895
Epoch 20/25
1712/1712 - 53s - loss: 16.0083 - rmse: 4.0010 - val_loss: 17.3214 - val_rmse: 4.1619
Epoch 21/25
1712/1712 - 51s - loss: 11.4941 - rmse: 3.3903 - val_loss: 10.2949 - val_rmse: 3.2086
Epoch 22/25
1712/1712 - 48s - loss: 11.8060 - rmse: 3.4360 - val_loss: 14.4589 - val_rmse: 3.8025
Epoch 23/25
1712/1712 - 58s - loss: 12.6722 - rmse: 3.5598 - val_loss: 24.6002 - val_rmse: 4.9599
Epoch 24/25
1712/1712 - 46s - loss: 12.1520 - rmse: 3.4860 - val_loss: 10.7128 - val_rmse: 3.2730
Epoch 25/25
1712/1712 - 45s - loss: 10.3928 - rmse: 3.2238 - val_loss: 9.2582 - val_rmse: 3.0427
Final loss of training data: 10.392837132248923
Final loss of validation data: 9.25821566358905
In [16]:
# Predict from model 2 (CNN architecture)
X_test_2 = X_test.reshape(1783, IMAGE_SIZE, IMAGE_SIZE, 1)
model_2_predictions = model_2.predict(X_test_2)
model_2_predictions.shape
Out[16]:
(1783, 30)
In [17]:
# Visualize the predictions
fig = plt.figure(figsize=(12, 12))

for i in range(9):
    ax = fig.add_subplot(3, 3, i + 1)
    visualize_pred(model_2_predictions, i)