Monday, June 10, 2019

Autograd

from sklearn import datasets
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import mxnet as mx
print(mx.__version__)

# Understand Autograd function
x = mx.nd.array([1,2,3,4])
x.attach_grad() # Attach gradient operation

with mx.autograd.record():
    y = 2*x
    loss = mx.nd.sum(y*y)
    #loss function = 4 x square

print "Current Gradient of x = ", x.grad
loss.backward() # Back propagation
print "New Gradient of x = ", x.grad
print "Verify that gradient hand computed = 8*x", 8*x


'''
Current Gradient of x =  
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>
New Gradient of x =  
[ 8. 16. 24. 32.]
<NDArray 4 @cpu(0)>
Verify that gradient hand computed = 8*x 
[ 8. 16. 24. 32.]
<NDArray 4 @cpu(0)>
'''

# Regression
iris = datasets.load_iris()
iris_df = pd.DataFrame(iris.data)
iris_df.columns = ['sepal_length','sepal_width','petal_length','petal_width']
iris_df['iris'] = iris.target
df = iris_df
df.head()

'''
sepal_length sepal_width petal_length petal_width iris
0 5.1 3.5 1.4 0.2 0
1 4.9 3.0 1.4 0.2 0
2 4.7 3.2 1.3 0.2 0
3 4.6 3.1 1.5 0.2 0
4 5.0 3.6 1.4 0.2 0
'''
# Create two dummy variables from "iris"  
df['i_setosa'] = 0
df.loc[(df['iris']==0), 'i_setosa']= 1
df['i_versicolor'] = 0
df.loc[(df['iris']==1), 'i_versicolor']= 1

# Split dataset into training set and test set 
df_train, df_test = train_test_split( df, test_size=0.3, random_state=1)

# Slice datasets into X (independent variables) and y (target variable)
independent_var = ['sepal_width','petal_length','petal_width','i_setosa','i_versicolor']
y_train = mx.nd.array(df_train['sepal_length'])
X_train = mx.nd.array(df_train[independent_var]) 
y_test = mx.nd.array(df_test['sepal_length'])
X_test = mx.nd.array(df_test[independent_var])

mx.random.seed(1)
X = mx.nd.array(X_train) 
y = mx.nd.array(y_train)


w = [1,1,1,1,1] # Beta intercepts for dimensions
b = [0] # intercept
params = mx.nd.array(w + b)

# Enter code here for attaching gradient
params.attach_grad()
params


#Compute func 
betatimex = X_train * params[0:5] + params[5:6]
betatimex_sum = betatimex[:,0] + betatimex[:,1] + betatimex[:,2] + betatimex[:,3] + betatimex[:,4]
betatimex_sum
print len(betatimex_sum)
print len(y_train)



#betatimex_sum is y hat
# 1 itteration
with mx.autograd.record():
    # Enter CODE here for loss function
    # in terms of the data and parameters w,b
    # defined in the data_instance
    betatimex = X * params[0:5] + params[5:6]
    betatimex_sum = betatimex[:,0] + betatimex[:,1] + betatimex[:,2] + betatimex[:,3] + betatimex[:,4]
    error = (y_train - betatimex_sum)
    loss = mx.nd.mean(error*error)

print ('Starting value of params = {}'.format(params))
loss.backward()   
#print (params)
print ('Gradieants of loss function wrt to params {}'.format(params.grad))
lr = 1e-2
params = params - lr*params.grad
print ('New value of params = {}'.format(params))
params.attach_grad()

'''
Starting value of params = 
[1. 1. 1. 1. 1. 0.]
<NDArray 6 @cpu(0)>
Gradieants of loss function wrt to params 
[16.783619  25.582096   8.73162    0.7542857  2.0266666 28.09524  ]
<NDArray 6 @cpu(0)>
New value of params = 
[ 0.8321638   0.744179    0.9126838   0.99245715  0.97973335 -0.2809524 ]
<NDArray 6 @cpu(0)>
'''

# Modeling parameters
learning_rate = 1e-2
num_iters = 100
lr = 1e-2
    

# Use for appending training loss in each iteration
loss_sequence = []
    
# Train
for iteration in range(num_iters):
    # Record computational graph
    print ("iteration %s Start " %(iteration))


    with mx.autograd.record():
        # Enter CODE here for loss function
        # in terms of the data and parameters w,b
        # defined in the data_instance
        betatimex = X * params[0:5] + params[5:6]
        betatimex_sum = betatimex[:,0] + betatimex[:,1] + betatimex[:,2] + betatimex[:,3] + betatimex[:,4]
        error = (y_train - betatimex_sum)
        loss = mx.nd.mean(error*error)
    loss.backward()

    params = params - lr*params.grad
    params.attach_grad()
        
    # Print iter, loss
    print ("iteration %s, Mean loss: %s" % (iteration,loss))
    # Append loss in each interation to loss_sequence
    loss_sequence.append(loss.asscalar())
        
# Plot Training Loss    
plt.figure(num=None,figsize=(8, 6))
plt.plot(loss_sequence)
plt.xlabel('iteration',fontsize=14)
plt.ylabel('Mean loss',fontsize=14)

# Test
# Enter code here for calculating mean squared error on test set

'''
iteration 0 Start 
iteration 0, Mean loss: 
[0.8541098]
<NDArray 1 @cpu(0)>
iteration 1 Start 
iteration 1, Mean loss: 
[0.7757419]
<NDArray 1 @cpu(0)>
iteration 2 Start 
iteration 2, Mean loss: 
[0.70869106]
<NDArray 1 @cpu(0)>
iteration 3 Start 
iteration 3, Mean loss: 
'''



betatimextest = X_test * params[0:5] + params[5:6]
betatimex_sumtest = betatimextest[:,0] + betatimextest[:,1] + betatimextest[:,2] + betatimextest[:,3] + betatimextest[:,4]
error_test = (y_test - betatimex_sumtest)
MSE = mx.nd.mean(error_test*error_test)

print ("Mean Squared Error on Test Set: %s" % (MSE))

"""
Mean Squared Error on Test Set: 
[0.17857154]
<NDArray 1 @cpu(0)>
"""


# Convolution

from skimage import io, viewer, color
import os
from PIL import Image
import numpy as np
from skimage import exposure
import pylab

os.getcwd()

img = io.imread('bw_image1.jpg', as_grey=True)  # load the image as grayscale
print 'image matrix size: ', img.shape      # print the size of image
print '\n First 5 columns and rows of the image matrix: \n', img[:5,:5]*255 
#viewer.ImageViewer(img).show()              # plot the image

"""
image matrix size:  (897, 1168)

 First 5 columns and rows of the image matrix: 
[[88. 85. 82. 80. 81.]
 [88. 85. 88. 85. 84.]
 [81. 81. 87. 86. 83.]
 [86. 87. 88. 87. 83.]
 [86. 90. 85. 86. 83.]]
"""

# Convolution

Image.open("convolution_kernel.JPG" )





def convolve2d(image, kernel, padding=1):
    # This function which takes an image and a kernel 
    # and returns the convolution of them
    # Args:
    #   image: a numpy array of size [image_height, image_width].
    #   kernel: a numpy array of size [kernel_height, kernel_width].
    # Returns:
    #   a numpy array of size [image_height, image_width] (convolution output).
    
    kernel = np.flipud(np.fliplr(kernel))    # Flip the kernel
    print 'image shape    ',image.shape
    print 'kernel shape   ',kernel.shape
    # Add zero padding to the input image
    
    image_padded = np.zeros((image.shape[0] + 2*padding, image.shape[1] + 2*padding))   
    output = np.zeros((image_padded.shape[0]-kernel.shape[0] , image_padded.shape[1]-kernel.shape[0])) # convolution output
    print 'output shape    ',output.shape
    
    if padding > 0:
        image_padded[padding:-(1*padding), padding:-(1*padding)] = image
    else:
        image_padded = image
    print 'image_padded shape   ',image_padded.shape
    for x in range(image.shape[1]-kernel.shape[0]):     # Loop over every pixel of the image
        for y in range(image.shape[0]-kernel.shape[0]):
            # element-wise multiplication of the kernel and the image
            output[y,x]=(kernel*image_padded[y:y+3, x:x+3]).sum()        
    return output

def Relu(image, threshold):
    image[image < threshold] = 0
    return image

def Maxpool(image, sqr_size):
    print 'image shape    ',image.shape
    output = np.zeros((image.shape[0]-sqr_size+1 , image.shape[1]-sqr_size+1)) # convolution output
    print 'output shape    ',output.shape

    for x in range(image.shape[1]-sqr_size):     # Loop over every pixel of the image
        for y in range(image.shape[0]-sqr_size):
            output[y,x]=(image_padded[y:y+sqr_size, x:x+sqr_size]).max()        
    return output

img = io.imread('bw_image1.jpg')    # Load the image
img = color.rgb2gray(img)       # Convert the image to grayscale (1 channel)
image_equalized = exposure.equalize_adapthist(img/np.max(np.abs(img)), clip_limit=0.03)
print '\n First 5 columns and rows of the image_sharpen matrix: \n', img[:5,:5]*255

"""
 First 5 columns and rows of the image_sharpen matrix: 
[[88. 85. 82. 80. 81.]
 [88. 85. 88. 85. 84.]
 [81. 81. 87. 86. 83.]
 [86. 87. 88. 87. 83.]
 [86. 90. 85. 86. 83.]]
"""

plt.imshow(img, cmap=plt.cm.gray)
plt.axis('off')
plt.show()




# Convolve the sharpen kernel and the image
kernel = np.array([[0,-1,0],[-1,5,-1],[0,-1,0]])
image_sharpen = convolve2d(img,kernel, 0)
#image_sharpen = convolve2d(image_sharpen,kernel, 0)
print '\n First 5 columns and rows of the image_sharpen matrix: \n', image_sharpen[:5,:5]*255
print '\n New shape : \n', img.shape
print '\n New shape : \n', image_sharpen.shape

"""
image shape     (897, 1168)
kernel shape    (3, 3)
output shape     (894, 1165)
image_padded shape    (897, 1168)

 First 5 columns and rows of the image_sharpen matrix: 
[[ 83. 101.  87.  86.  85.]
 [ 65.  92.  88.  74. 101.]
 [ 90.  94.  92.  75.  95.]
 [105.  79.  91.  77.  85.]
 [ 92.  68.  84.  82.  78.]]

 New shape : 
(897, 1168)

 New shape : 
(894, 1165)
"""


# Plot the filtered image
plt.imshow(image_sharpen, cmap=plt.cm.gray)
plt.axis('off')
plt.show()





image_relu = Relu(image_sharpen, 0.3)
print '\n First 5 columns and rows of the image_sharpen matrix: \n', image_sharpen[:5,:5]*255
# Plot the filtered image
plt.imshow(image_relu, cmap=plt.cm.gray)
plt.axis('off')
plt.show()

"""
First 5 columns and rows of the image_sharpen matrix: 
[[ 83. 101.  87.  86.  85.]
 [  0.  92.  88.   0. 101.]
 [ 90.  94.  92.   0.  95.]
 [105.  79.  91.  77.  85.]
 [ 92.   0.  84.  82.  78.]]
"""



image_maxpool = Maxpool(image_relu, 5)
print '\n First 5 columns and rows of the image_sharpen matrix: \n', image_sharpen[:5,:5]*255
# Plot the filtered image
plt.imshow(image_sharpen, cmap=plt.cm.gray)
plt.axis('off')
plt.show()

"""
image shape     (894, 1165)
output shape     (890, 1161)

 First 5 columns and rows of the image_sharpen matrix: 
[[ 83. 101.  87.  86.  85.]
 [  0.  92.  88.   0. 101.]
 [ 90.  94.  92.   0.  95.]
 [105.  79.  91.  77.  85.]
 [ 92.   0.  84.  82.  78.]]
"""




"""
Convolution Parameter
Kernel Size
Stride (Horizontal, vertical)
Padding (Filter kernel on edges)
Dialation (skip pixel while applying filter/convolution)
Input --> Conv --> ReLU --> Pool --> ReLU -->Conv --> ReLU --> Pool --> Fully Connected
Overfitteing -- > Regularization ---> Data Augmentation - flip rotate etc
Batch normalization avoids overfitting
"""

Self Attention

  x → Embedding → MultiHeadAttention → Concat → Project to lower dim → → Add(x) → LayerNorm → FFN → Add → LayerNorm Vocab to embedding t...