CNN: |
Number of training examples = 34799
Number of validation examples = 4410
Number of testing examples = 12630
Image data shape = (32, 32, 3)
Number of classes = 43
matplotlib
. , pyplot
, matplotlib.gridspec
3 :gs = gridspec.GridSpec(1, 3, wspace=0.25, hspace=0.1)
fig = plt.figure(figsize=(12,2))
ax1, ax2, ax3 = [plt.subplot(gs[:, i]) for i in range(3)]
Gridspec
. , , . Gridspec
, .scipy
)scikit-image
, pip (OpenCV ). (CLAHE, contrast limited adaptive histogram equalization):skimage.exposure.equalize_adapthist
.skimage
, , , , . , IPython Parallel (ipyparallel
). : CLAHE . ( ipyparallel
) ipyparallel:$ ipcluster start
from skimage import exposure
def grayscale_exposure_equalize(batch_x_y):
"""Processes a batch with images by grayscaling, normalization and
histogram equalization.
Args:
batch_x_y: a single batch of data containing a numpy array of images
and a list of corresponding labels.
Returns:
Numpy array of processed images and a list of labels (unchanged).
"""
x_sub, y_sub = batch_x_y[0], batch_x_y[1]
x_processed_sub = numpy.zeros(x_sub.shape[:-1])
for x in range(len(x_sub)):
# Grayscale
img_gray = numpy.dot(x_sub[x][...,:3], [0.299, 0.587, 0.114])
# Normalization
img_gray_norm = img_gray / (img_gray.max() + 1)
# CLAHE. num_bins will be initialized in ipyparallel client
img_gray_norm = exposure.equalize_adapthist(img_gray_norm, nbins=num_bins)
x_processed_sub[x,...] = img_gray_norm
return (x_processed_sub, y_sub)
import multiprocessing
import ipyparallel as ipp
import numpy as np
def preprocess_equalize(X, y, bins=256, cpu=multiprocessing.cpu_count()):
""" A simplified version of a function which manages multiprocessing logic.
This function always grayscales input images, though it can be generalized
to apply any arbitrary function to batches.
Args:
X: numpy array of all images in dataset.
y: a list of corresponding labels.
bins: the amount of bins to be used in histogram equalization.
cpu: the number of cpu cores to use. Default: use all.
Returns:
Numpy array of processed images and a list of labels.
"""
rc = ipp.Client()
# Use a DirectView object to broadcast imports to all engines
with rc[:].sync_imports():
import numpy
from skimage import exposure, transform, color
# Use a DirectView object to set up the amount of bins on all engines
rc[:]['num_bins'] = bins
X_processed = np.zeros(X.shape[:-1])
y_processed = np.zeros(y.shape)
# Number of batches is equal to cpu count
batches_x = np.array_split(X, cpu)
batches_y = np.array_split(y, cpu)
batches_x_y = zip(batches_x, batches_y)
# Applying our function of choice to each batch with a DirectView method
preprocessed_subs = rc[:].map(grayscale_exposure_equalize, batches_x_y).get_dict()
# Combining the output batches into a single dataset
cnt = 0
for _,v in preprocessed_subs.items():
x_, y_ = v[0], v[1]
X_processed[cnt:cnt+len(x_)] = x_
y_processed[cnt:cnt+len(y_)] = y_
cnt += len(x_)
return X_processed.reshape(X_processed.shape + (1,)), y_processed
# X_train: numpy array of (34799, 32, 32, 3) shape
# y_train: a list of (34799,) shape
X_tr, y_tr = preprocess_equalize(X_train, y_train, bins=128)
num_bins
, :num_bins
, , . num_bins
, - .%store
, :# Same images, multiple bins (contrast augmentation)
%store X_tr_8
%store y_tr_8
# ...
%store X_tr_512
%store y_tr_512
numpy
skimage
:import numpy as np
from skimage import transform
from skimage.transform import warp, AffineTransform
def rotate_90_deg(X):
X_aug = np.zeros_like(X)
for i,img in enumerate(X):
X_aug[i] = transform.rotate(img, 270.0)
return X_aug
def rotate_180_deg(X):
X_aug = np.zeros_like(X)
for i,img in enumerate(X):
X_aug[i] = transform.rotate(img, 180.0)
return X_aug
def rotate_270_deg(X):
X_aug = np.zeros_like(X)
for i,img in enumerate(X):
X_aug[i] = transform.rotate(img, 90.0)
return X_aug
def rotate_up_to_20_deg(X):
X_aug = np.zeros_like(X)
delta = 20.
for i,img in enumerate(X):
X_aug[i] = transform.rotate(img, random.uniform(-delta, delta), mode='edge')
return X_aug
def flip_vert(X):
X_aug = deepcopy(X)
return X_aug[:, :, ::-1, :]
def flip_horiz(X):
X_aug = deepcopy(X)
return X_aug[:, ::-1, :, :]
def affine_transform(X, shear_angle=0.0, scale_margins=[0.8, 1.5], p=1.0):
"""This function allows applying shear and scale transformations
with the specified magnitude and probability p.
Args:
X: numpy array of images.
shear_angle: maximum shear angle in counter-clockwise direction as radians.
scale_margins: minimum and maximum margins to be used in scaling.
p: a fraction of images to be augmented.
"""
X_aug = deepcopy(X)
shear = shear_angle * np.random.rand()
for i in np.random.choice(len(X_aug), int(len(X_aug) * p), replace=False):
_scale = random.uniform(scale_margins[0], scale_margins[1])
X_aug[i] = warp(X_aug[i], AffineTransform(scale=(_scale, _scale), shear=shear), mode='edge')
return X_aug
rotate_up_to_20_deg
, . (flips) 90, 180, 270 , , . , , ( ):label_class | label_name | rotate_90_deg | rotate_180_deg | rotate_270_deg | flip_horiz | flip_vert |
---|---|---|---|---|---|---|
13 | Yield | 13 | ||||
14 | Stop | |||||
15 | No vehicles | 15 | 15 | 15 | 15 | 15 |
16 | Vehicles over 3.5 ton prohibited |
|||||
17 | No entry | 17 | 17 | 17 |
import pandas as pd
# Generate an augmented dataset using a transform table
augmentation_table = pd.read_csv('augmentation_table.csv', index_col='label_class')
augmentation_table.drop('label_name', axis=1, inplace=True)
augmentation_table.dropna(axis=0, how='all', inplace=True)
# Collect all global functions in global namespace
namespace = __import__(__name__)
def apply_augmentation(X, how=None):
"""Apply an augmentation function specified in `how` (string) to a numpy array X.
Args:
X: numpy array with images.
how: a string with a function name to be applied to X, should return
the same-shaped numpy array as in X.
Returns:
Augmented X dataset.
"""
assert augmentation_table.get(how) is not None
augmentator = getattr(namespace, how)
return augmentator(X)
augmentation_table.csv
:import numpy as np
def flips_rotations_augmentation(X, y):
"""A pipeline for applying augmentation functions listed in `augmentation_table`
to a numpy array with images X.
"""
# Initializing empty arrays to accumulate intermediate results of augmentation
X_out, y_out = np.empty([0] + list(X.shape[1:]), dtype=np.float32), np.empty([0])
# Cycling through all label classes and applying all available transformations
for in_label in augmentation_table.index.values:
available_augmentations = dict(augmentation_table.ix[in_label].dropna(axis=0))
images = X[y==in_label]
# Augment images and their labels
for kind, out_label in available_augmentations.items():
X_out = np.vstack([X_out, apply_augmentation(images, how=kind)])
y_out = np.hstack([y_out, [out_label] * len(images)])
# And stack with initial dataset
X_out = np.vstack([X_out, X])
y_out = np.hstack([y_out, y])
# Random rotation is explicitly included in this function's body
X_out_rotated = rotate_up_to_20_deg(X)
y_out_rotated = deepcopy(y)
X_out = np.vstack([X_out, X_out_rotated])
y_out = np.hstack([y_out, y_out_rotated])
return X_out, y_out
affine_transform
: ( , , ).flips_rotations_augmentation
: augmentation_table.csv
, .def augmented_batch_generator(X, y, batch_size, rotations=True, affine=True,
shear_angle=0.0, scale_margins=[0.8, 1.5], p=0.35):
"""Augmented batch generator. Splits the dataset into batches and augments each
batch independently.
Args:
X: numpy array with images.
y: list of labels.
batch_size: the size of the output batch.
rotations: whether to apply `flips_rotations_augmentation` function to dataset.
affine: whether to apply `affine_transform` function to dataset.
shear_angle: `shear_angle` argument for `affine_transform` function.
scale_margins: `scale_margins` argument for `affine_transform` function.
p: `p` argument for `affine_transform` function.
"""
X_aug, y_aug = shuffle(X, y)
# Batch generation
for offset in range(0, X_aug.shape[0], batch_size):
end = offset + batch_size
batch_x, batch_y = X_aug[offset:end,...], y_aug[offset:end]
# Batch augmentation
if affine is True:
batch_x = affine_transform(batch_x, shear_angle=shear_angle, scale_margins=scale_margins, p=p)
if rotations is True:
batch_x, batch_y = flips_rotations_augmentation(batch_x, batch_y)
yield batch_x, batch_y
num_bins
CLAHE train, . : , :