Computer Vision with Animals from QuickDraw

Recognizing Animals from the QuickDraw Dataset

The Quick Draw Dataset is a collection of 50 million drawings collected by Google, contributed by people drawing objects from 345 categories. You can browse the drawings here.

alt=""

In [4]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import urllib.request

%matplotlib inline
In [5]:
print(tf.__version__)
print(keras.__version__)
2.1.0
2.2.4-tf

We choose 12 animals from the 345 categories to train our model on.

In [6]:
base = "https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/"

classes = ["bee", "bird", "butterfly", "camel", "cat", "dragon", "hedgehog",
           "horse", "kangaroo", "penguin", "tiger", "whale"]
In [9]:
def download_data(base, classes):
    """Downloads requested classes from Google's Quickdraw dataset
    and stores them in a newly created data folder """
    # Make data folder if doesn't already exist
    !mkdir -p data
    for c in classes:
        path = base + c + ".npy"
        print(path)
        urllib.request.urlretrieve(path, "data/"+ c +".npy")
In [10]:
# Download data from Google's API for each chose class
download_data(base=base, classes=classes)
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/bee.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/bird.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/butterfly.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/camel.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/cat.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/dragon.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/hedgehog.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/horse.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/kangaroo.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/penguin.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/tiger.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/whale.npy

Preprocess the data

In [7]:
def load_preprocess_data(path, test_ratio = 0.2, items_per_class = 20000):
    """Loads data from the data folder and preprocesses them before training.
    Returns a train-test split dataset."""

    # Initialize variables 
    x = np.empty([0, 784])
    y = np.empty([0])

    # Load each data file 
    filelist = [x for x in os.listdir(path)]
    
    for i, file in enumerate(filelist):
        data = np.load(path + "/" + file)
        data = data[0: items_per_class, :]
        labels = np.full(data.shape[0], i)

        x = np.concatenate((x, data), axis=0)
        y = np.append(y, labels)

    data = None
    labels = None
    
    # Randomize the dataset 
    permutation = np.random.permutation(y.shape[0])
    x = x[permutation, :]
    y = y[permutation]

    # Separate into training and testing 
    test_size = int(x.shape[0] / 100 * (test_ratio * 100))

    X_test = x[0:test_size, :]
    y_test = y[0:test_size]

    X_train = x[test_size:x.shape[0], :]
    y_train = y[test_size:y.shape[0]]
    
    # Reshape and normalize
    X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
    X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')

    X_train /= 255.0
    X_test /= 255.0

    # Convert class vectors to class matrices
    num_classes = len(classes)
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    return X_train, y_train, X_test, y_test
In [8]:
X_train, y_train, X_test, y_test = load_preprocess_data("data") 

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
(192000, 28, 28, 1) (192000, 12) (48000, 28, 28, 1) (48000, 12)

Visualize the data

In [9]:
# Visualize the images with their labels
fig, axes = plt.subplots(5, 5, figsize=(10, 10))

for i, ax in enumerate(axes.flatten()):
    X_show = np.squeeze(X_train[i])
    ax.imshow(X_show, cmap="gray_r", interpolation="nearest") 
    ax.set_title(classes[int((np.where(y_train[i] == 1)[0]))])
    ax.axis("off")

Even for a human labels are far from obvious for some images. Let's see how the our model manages it.

Model

We use a Convolutional Neural Network (CNN) that we train on the image samples.

In [22]:
# Define model
model = keras.Sequential()
model.add(keras.layers.Convolution2D(16, (3, 3),
                        padding="same",
                        input_shape=X_train.shape[1:], activation="relu"))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Convolution2D(32, (3, 3), padding="same", activation= "relu"))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Convolution2D(64, (3, 3), padding="same", activation= "relu"))
model.add(keras.layers.MaxPooling2D(pool_size =(2,2)))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation="relu"))
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(64, activation="relu"))
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(12, activation="softmax"))

model.compile(loss="categorical_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

print(model.summary())
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_6 (Conv2D)            (None, 28, 28, 16)        160       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 14, 14, 32)        4640      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 7, 7, 32)          0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 7, 7, 64)          18496     
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 3, 3, 64)          0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 3, 3, 64)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)               73856     
_________________________________________________________________
dropout_7 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_8 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 12)                780       
=================================================================
Total params: 106,188
Trainable params: 106,188
Non-trainable params: 0
_________________________________________________________________
None
In [23]:
# Train the model
history = model.fit(x=X_train, y=y_train, validation_split=0.1, batch_size=32, verbose=2, epochs=20)
Train on 172800 samples, validate on 19200 samples
Epoch 1/20
172800/172800 - 82s - loss: 0.8578 - accuracy: 0.7276 - val_loss: 0.5763 - val_accuracy: 0.8176
Epoch 2/20
172800/172800 - 91s - loss: 0.6041 - accuracy: 0.8114 - val_loss: 0.5016 - val_accuracy: 0.8430
Epoch 3/20
172800/172800 - 87s - loss: 0.5451 - accuracy: 0.8288 - val_loss: 0.5050 - val_accuracy: 0.8392
Epoch 4/20
172800/172800 - 84s - loss: 0.5112 - accuracy: 0.8405 - val_loss: 0.4653 - val_accuracy: 0.8519
Epoch 5/20
172800/172800 - 79s - loss: 0.4867 - accuracy: 0.8476 - val_loss: 0.4663 - val_accuracy: 0.8493
Epoch 6/20
172800/172800 - 68s - loss: 0.4723 - accuracy: 0.8524 - val_loss: 0.4479 - val_accuracy: 0.8572
Epoch 7/20
172800/172800 - 86s - loss: 0.4588 - accuracy: 0.8556 - val_loss: 0.4513 - val_accuracy: 0.8557
Epoch 8/20
172800/172800 - 79s - loss: 0.4483 - accuracy: 0.8582 - val_loss: 0.4450 - val_accuracy: 0.8581
Epoch 9/20
172800/172800 - 75s - loss: 0.4379 - accuracy: 0.8615 - val_loss: 0.4448 - val_accuracy: 0.8599
Epoch 10/20
172800/172800 - 80s - loss: 0.4289 - accuracy: 0.8639 - val_loss: 0.4423 - val_accuracy: 0.8612
Epoch 11/20
172800/172800 - 79s - loss: 0.4232 - accuracy: 0.8659 - val_loss: 0.4319 - val_accuracy: 0.8633
Epoch 12/20
172800/172800 - 74s - loss: 0.4176 - accuracy: 0.8669 - val_loss: 0.4380 - val_accuracy: 0.8612
Epoch 13/20
172800/172800 - 65s - loss: 0.4130 - accuracy: 0.8689 - val_loss: 0.4382 - val_accuracy: 0.8636
Epoch 14/20
172800/172800 - 63s - loss: 0.4046 - accuracy: 0.8714 - val_loss: 0.4452 - val_accuracy: 0.8608
Epoch 15/20
172800/172800 - 64s - loss: 0.4047 - accuracy: 0.8707 - val_loss: 0.4487 - val_accuracy: 0.8603
Epoch 16/20
172800/172800 - 71s - loss: 0.3970 - accuracy: 0.8727 - val_loss: 0.4364 - val_accuracy: 0.8629
Epoch 17/20
172800/172800 - 79s - loss: 0.3937 - accuracy: 0.8742 - val_loss: 0.4370 - val_accuracy: 0.8637
Epoch 18/20
172800/172800 - 77s - loss: 0.3920 - accuracy: 0.8749 - val_loss: 0.4571 - val_accuracy: 0.8561
Epoch 19/20
172800/172800 - 79s - loss: 0.3875 - accuracy: 0.8762 - val_loss: 0.4412 - val_accuracy: 0.8639
Epoch 20/20
172800/172800 - 79s - loss: 0.3859 - accuracy: 0.8762 - val_loss: 0.4425 - val_accuracy: 0.8615
In [24]:
# Visualize learning curves
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
In [25]:
pred = model.predict(X_test)
results = model.evaluate(X_test, y_test)

print(f"After 20 epochs, the loss is {results[0]}, and the accuracy is {results[1]}")
48000/48000 [==============================] - 8s 164us/sample - loss: 0.4495 - accuracy: 0.8621
After 20 epochs, the loss is 0.44952674834926926, and the accuracy is 0.8621041774749756

Get errors

In [73]:
# Get predictions and true labels
y_pred = np.argmax(pred, axis = 1)
y = np.argmax(y_test, axis = 1)

# Get the indices of the bad labels 
bad_pred = np.argwhere((y == y_pred) == False).squeeze()

# Plot
fig, axes = plt.subplots(3, 3, figsize=(9, 9))

for i, ax in enumerate(axes.flatten()):
    idx = np.random.choice(bad_pred)
    X_show = np.squeeze(X_test[idx])
    ax.imshow(X_show, cmap="gray_r", interpolation="nearest") 
    ax.set_title(f"True label: {classes[y[idx]]}, \nPrediction: {classes[y_pred[idx]]}")
    ax.axis("off")