Computer Vision with Animals from QuickDraw
In [4]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import urllib.request
%matplotlib inline
In [5]:
print(tf.__version__)
print(keras.__version__)
We choose 12 animals from the 345 categories to train our model on.
In [6]:
base = "https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/"
classes = ["bee", "bird", "butterfly", "camel", "cat", "dragon", "hedgehog",
"horse", "kangaroo", "penguin", "tiger", "whale"]
In [9]:
def download_data(base, classes):
"""Downloads requested classes from Google's Quickdraw dataset
and stores them in a newly created data folder """
# Make data folder if doesn't already exist
!mkdir -p data
for c in classes:
path = base + c + ".npy"
print(path)
urllib.request.urlretrieve(path, "data/"+ c +".npy")
In [10]:
# Download data from Google's API for each chose class
download_data(base=base, classes=classes)
Preprocess the data¶
In [7]:
def load_preprocess_data(path, test_ratio = 0.2, items_per_class = 20000):
"""Loads data from the data folder and preprocesses them before training.
Returns a train-test split dataset."""
# Initialize variables
x = np.empty([0, 784])
y = np.empty([0])
# Load each data file
filelist = [x for x in os.listdir(path)]
for i, file in enumerate(filelist):
data = np.load(path + "/" + file)
data = data[0: items_per_class, :]
labels = np.full(data.shape[0], i)
x = np.concatenate((x, data), axis=0)
y = np.append(y, labels)
data = None
labels = None
# Randomize the dataset
permutation = np.random.permutation(y.shape[0])
x = x[permutation, :]
y = y[permutation]
# Separate into training and testing
test_size = int(x.shape[0] / 100 * (test_ratio * 100))
X_test = x[0:test_size, :]
y_test = y[0:test_size]
X_train = x[test_size:x.shape[0], :]
y_train = y[test_size:y.shape[0]]
# Reshape and normalize
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')
X_train /= 255.0
X_test /= 255.0
# Convert class vectors to class matrices
num_classes = len(classes)
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
return X_train, y_train, X_test, y_test
In [8]:
X_train, y_train, X_test, y_test = load_preprocess_data("data")
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
Visualize the data¶
In [9]:
# Visualize the images with their labels
fig, axes = plt.subplots(5, 5, figsize=(10, 10))
for i, ax in enumerate(axes.flatten()):
X_show = np.squeeze(X_train[i])
ax.imshow(X_show, cmap="gray_r", interpolation="nearest")
ax.set_title(classes[int((np.where(y_train[i] == 1)[0]))])
ax.axis("off")
Even for a human labels are far from obvious for some images. Let's see how the our model manages it.
Model¶
We use a Convolutional Neural Network (CNN) that we train on the image samples.
In [22]:
# Define model
model = keras.Sequential()
model.add(keras.layers.Convolution2D(16, (3, 3),
padding="same",
input_shape=X_train.shape[1:], activation="relu"))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Convolution2D(32, (3, 3), padding="same", activation= "relu"))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Convolution2D(64, (3, 3), padding="same", activation= "relu"))
model.add(keras.layers.MaxPooling2D(pool_size =(2,2)))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation="relu"))
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(64, activation="relu"))
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(12, activation="softmax"))
model.compile(loss="categorical_crossentropy",
optimizer="adam",
metrics=["accuracy"])
print(model.summary())
In [23]:
# Train the model
history = model.fit(x=X_train, y=y_train, validation_split=0.1, batch_size=32, verbose=2, epochs=20)
In [24]:
# Visualize learning curves
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
In [25]:
pred = model.predict(X_test)
results = model.evaluate(X_test, y_test)
print(f"After 20 epochs, the loss is {results[0]}, and the accuracy is {results[1]}")
Get errors¶
In [73]:
# Get predictions and true labels
y_pred = np.argmax(pred, axis = 1)
y = np.argmax(y_test, axis = 1)
# Get the indices of the bad labels
bad_pred = np.argwhere((y == y_pred) == False).squeeze()
# Plot
fig, axes = plt.subplots(3, 3, figsize=(9, 9))
for i, ax in enumerate(axes.flatten()):
idx = np.random.choice(bad_pred)
X_show = np.squeeze(X_test[idx])
ax.imshow(X_show, cmap="gray_r", interpolation="nearest")
ax.set_title(f"True label: {classes[y[idx]]}, \nPrediction: {classes[y_pred[idx]]}")
ax.axis("off")