# Classification Setup

In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import csv
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize
from sklearn import svm, metrics
from sklearn.model_selection import KFold
from sklearn.naive_bayes import MultinomialNB
import itertools
from joblib import dump, load

from IPython.display import Audio, display

def allDone():
    display(Audio(url='https://sound.peal.io/ps/audios/000/000/537/original/woo_vu_luvub_dub_dub.wav', autoplay=True))

In [2]:
#############################################
# Assuming folder structure:
# ./
#  ^---this doc
#  ^---flower_classification/
#                          ^---testing/
#                          ^---training/
#                          ^---training_label.csv
#############################################

# Data Loading

In [3]:
def load_set(dir_path, gray=False):
    # Return a list of tuples
    # [(image number, image), ...]
    x = []
    for filename in os.listdir(dir_path):
        if(filename.endswith(".jpg")):
            img_num = int(filename.split('_')[1].split('.')[0])
            if(gray):
                x.append((img_num, cv2.imread(os.path.join(dir_path, filename))))
            else:
                x.append((img_num, cv2.imread(os.path.join(dir_path, filename))))
                
    return x

def load_training_labels(label_path):
    # Return a list of tuples
    # [(image number, category), ...]
    labels = []
    with open(label_path, newline='\n') as csvfile:
        reader = csv.DictReader(csvfile)
        for line in reader:
            labels.append((int(line['Id'].split('_')[1]), int(line['Category'])))
    
    return labels


train_images = load_set('./flower_classification/training/')
train_labels = load_training_labels('./flower_classification/training_label.csv')

allDone()

In [4]:
# Sort training images by Image Number
train_images.sort(key=lambda x: int(x[0]))
train_labels.sort(key=lambda x: int(x[0]))

# Ensure data and labels are correct
for i in range(len(train_images)):
    if(train_images[i][0] != train_labels[i][0]):
        print("Image numbers are not aligned!")
if(len(train_images) != len(train_labels)):
    print("Different number of images & labels!")

# Remove image number from arrays
train_images = np.array([x[1] for x in train_images])
train_labels = np.array([x[1] for x in train_labels])

allDone()

# Data Prep

In [5]:
def resize_images(image_set, dim=(500,500)):
    '''
        Helper function to resize all the training images in the dataset - was used for testing with HOG's
    '''
    for i, img in enumerate(image_set):
        image_set[i] = cv2.resize(img, dim)

# resize_images(train_images)


# Feature Extraction

### Collect top n SIFT features

In [6]:
############ SIFT ################
'''
    Create a SIFT object
    Clamp the number of n-best keypoints returned, this is to keep the dimensionality of our input to k-means clustering
    reasonably sized to avoid immense run times.
'''
# 
# 
def get_sift_features(data):
    features = []
    for img in data:
        sift = cv2.xfeatures2d.SIFT_create(nfeatures=20)

        # detect SIFT features for image
        keypoints = sift.detect(img, None)
        keypoints, descriptors = sift.compute(img, keypoints)
        features.append((np.array(keypoints), np.array(descriptors)))
    return features


sift_feats = get_sift_features(train_images)

##################################



############# HOG #################
'''
Similar compute hog function to that seen in our tutorials.

Changed the reshaping to leave it as the full n-dim vector and made it function over a list of images
'''

def get_hog_features(data):
    cell_size=(8,8)
    block_size=(2,2)
    
    def compute_hog(train, cell_size=cell_size, block_size=block_size, nbins=9):
        feature_maps = []
        for i, img in enumerate(train):
            hog = cv2.HOGDescriptor(_winSize=(img.shape[1], img.shape[0]),
                                    _blockSize=(block_size[1] * cell_size[1],
                                                block_size[0] * cell_size[0]),
                                    _blockStride=(cell_size[1], cell_size[0]),
                                    _cellSize=(cell_size[1], cell_size[0]),
                                    _nbins=nbins)

            n_cells = (img.shape[0] // cell_size[0], img.shape[1] // cell_size[1])

            # Compute HoG features
            hog_feats = hog.compute(img)

            # Add the HOG vector to the list
            feature_maps.append([i, hog_feats])
        return feature_maps

##########################################

allDone()

In [7]:
# Separate the keypoints and descriptors
sift_kps = np.array([x[0] for x in sift_feats])
sift_descs = np.array([x[1] for x in sift_feats])


In [8]:
# To perform KMeans clustering, our data must maintain the same dimensionality
# Clamp or expand to 0 vectors depending on number of n-best SIFT features used
for i, desc in enumerate(sift_descs):
    if(len(desc) > 20):
        sift_descs[i] = desc[:20]
    if(len(desc) < 20):
        # pad with 0 vectors if couldnt discover 20 keypoints
        for j in range(20 - len(desc)):
            sift_descs[i] = np.append(sift_descs[i], np.zeros((1,128)), axis=0)
            
# Stack our input -- functionally a reshape
sift_descs = np.stack(sift_descs, 0)


number of sift features: 6000
number of keypoints per image: 21
number of descriptors per image: 21
size of descriptor: 128
sift_descs shape (6000,) (21, 128)


### Perform KMeans Clustering on full set of SIFT descriptors

In [9]:

def bag_of_words(X, n_clusters):
    '''
        Create visual vocabulary by clustering SIFT descriptors across whole dataset
        n_clusters = n-visual words
    '''
    X_shape = X.shape
    print("Shape of input: {}".format(X.shape))
    X = np.reshape(X, (X.shape[0] * X.shape[1], 128))
    print("Reshaped for clustering: {}".format(X.shape))
    
    print("Fitting on k={} clusters...".format(n_clusters))
    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(X)
    
    # Restore shape
    X = np.reshape(X, X_shape)
    
    print("Predicting...")
    cluster_hists = np.zeros((X.shape[0], n_clusters)) 
    for i, x in enumerate(X):
        # x is data point with 20 SIFT descriptors
        
        # find which cluster each SIFT descriptor is closest to
        preds = kmeans.predict(x)
        
        # resultant vector (n-cluster dim histogram) is feature vector for data point x
        bc = np.bincount(preds, minlength=n_clusters)
        # normalize vector to avoid massive weight discrepancies
        cluster_hists[i] = bc / np.linalg.norm(bc)
        
    # Other option for vector normalization is to encode as 1-hot bag of words
#     for i in range(len(cluster_hists)):
#         for j in range(len(cluster_hists[i])):
#             cluster_hists[i][j] = 1 if cluster_hists[i][j] > 0 else 0
    
    return cluster_hists

new_X = bag_of_words(sift_descs, 300)

print("Finished feature extraction.")
allDone()

Shape of input: (6000, 20, 128)
Reshaped for clustering: (120000, 128)
Fitting on k=300 clusters...
Predicting...
Finished feature extraction.


# Classifier Training

In [10]:


'''
    The following method was taken from Scikit-learn's documentation
'''
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

def svm_classify_validation(X, y, max_iter=1500, C=0.1):
    '''
        Perform 5-fold cross validation using SVM
    '''
    kf = KFold(n_splits=5, shuffle=True)
    for train_idx, test_idx in kf.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        print("Shape of train x and y: {} and {}".format(X_train.shape, y_train.shape))
        print("Shape of test x and y: {} and {}".format(X_test.shape, y_test.shape))

        classifier = svm.LinearSVC(max_iter=2000, C=0.15, random_state=0, tol=1e-5)
        classifier.fit(X_train, y_train)

        y_pred = classifier.predict(X_test)

        print("Classification report for [%s]:\n%s\n" % (classifier, metrics.classification_report(y_test, y_pred)))
        # Compute confusion matrix
        cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
        np.set_printoptions(precision=2)

        # Plot non-normalized confusion matrix
        plt.figure(figsize=(50,50))
        plot_confusion_matrix(cnf_matrix, classes=[i for i in range(1, 103)],
                      title='Confusion matrix, without normalization')
        plt.show()
#         print("Accuracy score: %s\n" % metrics.accuracy_score(y_test, y_pred))

svm_classify_validation(new_X, train_labels)

def mnb_classify_validation(X, y):
    kf = KFold(n_splits=5, shuffle=True)
    for train_idx, test_idx in kf.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        print("Shape of train x and y: {} and {}".format(X_train.shape, y_train.shape))
        print("Shape of test x and y: {} and {}".format(X_test.shape, y_test.shape))

        classifier = MultinomialNB()
        classifier.fit(X_train, y_train)

        y_pred = classifier.predict(X_test)

        print("Classification report for [%s]:\n%s\n" % (classifier, metrics.classification_report(y_test, y_pred)))
        # Compute confusion matrix
        cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
        np.set_printoptions(precision=2)

        # Plot non-normalized confusion matrix
        plt.figure(figsize=(50,50))
        plot_confusion_matrix(cnf_matrix, classes=[i for i in range(1, 103)],
                      title='Confusion matrix, without normalization')
        plt.show()
        print("Accuracy score: %s\n" % metrics.accuracy_score(y_test, y_pred))

# mnb_classify_validation(new_X, train_labels)

def save_model(X, y):
    # Function to save model weights to be applied later -- good for saving memory
    from joblib import dump, load
    classifier = svm.LinearSVC(max_iter=2000, C=0.15, random_state=0, tol=1e-5)
    classifier.fit(X, y)
    
    dump(classifier, './classifier.joblib')
    return

# save_model(new_X, train_labels)


allDone()

Shape of train x and y: (4800, 300) and (4800,)
Shape of test x and y: (1200, 300) and (1200,)
Classification report for [MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)]:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         5
           2       1.00      0.20      0.33         5
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         5
           5       0.00      0.00      0.00        10
           6       0.00      0.00      0.00        10
           7       0.00      0.00      0.00         7
           8       0.17      0.45      0.25        11
           9       0.00      0.00      0.00         7
          10       0.00      0.00      0.00         6
          11       0.00      0.00      0.00        15
          12       0.57      0.31      0.40        13
          13       0.00      0.00      0.00         6
          14       0.00      0.00      0.00        10
      

Classification report for [MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)]:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         7
           2       0.00      0.00      0.00        14
           3       0.00      0.00      0.00         6
           4       0.00      0.00      0.00         9
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         5
           7       0.00      0.00      0.00         5
           8       0.38      0.36      0.37        14
           9       0.00      0.00      0.00        10
          10       0.00      0.00      0.00         5
          11       0.00      0.00      0.00        13
          12       0.75      0.21      0.33        14
          13       0.00      0.00      0.00         8
          14       0.00      0.00      0.00         7
          15       0.00      0.00      0.00         5
          16       0.00      0.00      0.00    

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Classification report for [MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)]:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         6
           2       0.00      0.00      0.00         5
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         7
           5       0.00      0.00      0.00        12
           6       0.00      0.00      0.00         9
           7       0.00      0.00      0.00         4
           8       0.30      0.40      0.34        15
           9       0.00      0.00      0.00         7
          10       0.00      0.00      0.00         6
          11       0.00      0.00      0.00        10
          12       0.33      0.20      0.25        10
          13       0.00      0.00      0.00         3
          14       0.00      0.00      0.00         8
          15       0.00      0.00      0.00        11
          16       0.00      0.00      0.00    

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


# Classifier Testing

In [10]:
def classify_test_set():
    # Load test set
    test_images = load_set('./flower_classification/testing/')

    # Split Image Number and Image
    test_img_num = np.array([x[0] for x in test_images])
    test_images = np.array([x[1] for x in test_images])

    # Get sift features
    test_sift_feats = get_sift_features(test_images)

    # Separate the keypoints and descriptors
    test_sift_kps = np.array([x[0] for x in test_sift_feats])
    test_sift_descs = np.array([x[1] for x in test_sift_feats])

    for i, desc in enumerate(test_sift_descs):
        if(len(desc) > 20):
            test_sift_descs[i] = desc[:20]
        if(len(desc) < 20):
            # pad with 0 vectors if couldnt discover 50 keypoints
            for j in range(20 - len(desc)):
                test_sift_descs[i] = np.append(test_sift_descs[i], np.zeros((1,128)), axis=0)

    # Stack our input -- functionally a reshape
    test_sift_descs = np.stack(test_sift_descs, 0)

    X_test = bag_of_words(test_sift_descs, 300)

    # Save classifier to use later on
    classifier = load('classifier.joblib')
    # classifier = svm.LinearSVC(max_iter=2000, C=0.15, random_state=0, tol=1e-5)
    y_pred = classifier.predict(X_test)

    allDone()

# classify_test_set()

# result = np.array(list(zip(img_numsss, y_pred)))
# np.savetxt("results5.csv", result, delimiter=",", fmt="%s")

Shape of input: (2189, 20, 128)
Reshaped for clustering: (43780, 128)
Fitting on k=300 clusters...
Predicting...
