# -*- coding: utf-8 -*-
"""
Created on Sun Jan 30 10:25:51 2022

@author: Darshan Lodha
"""

"""____________________ Question 1.) __________________"""

# Importing library numpy
import numpy as np
# Constructing the given transition probability matrix
tpm_matrix = np.array([0.75,0.15,0.10,0.45,0.45,0.10,0.30,0.10,0.60]).reshape(3,3)
tpm_matrix

# Constructing a matrix which will store the product of TPM in the constructed 
# loop
# The matrix is created in such a way that not all the elements in a 
# particular column are the same.
product_matrix = np.array([0,1,0,1,0,0,0,0,1]).reshape(3,3)

# Creating a dummy matrix to which initialises with the given TPM and is 
# used in the while loop
dummy_matrix = tpm_matrix

# Counter variable 'i', currently storing 0 representing the row number
i = 0

# While loop for calculating the stationary distribution 
while ((product_matrix[i,0] != product_matrix[i+1,0]) & (product_matrix[i+1,0] != product_matrix[i+2,0])):
        product_matrix = np.dot(dummy_matrix,tpm_matrix)
        dummy_matrix = product_matrix

# (Note on the loop constructed:
#  Only the elements in the first column are passed as a condition in the while
#  loop, since if those becomes equal, all the elements on the second and third
#  column will be equal as well

# Calling the product_matrix which has all the elements in a particular 
# column equal
product_matrix

# Printing the output rounded off to 2 decimal places (The second decimal 
# place is 0 which is why python won't print the second decimal place)
print("The stationary distribution for the given TPM is:\n","(",
      round(product_matrix[0,0],2),",",round(product_matrix[0,1],2),",",
      round(product_matrix[0,2],2),")")

""" Final Ans: The stationary distribution of the given TPM rounded of to 2
 decimal places is (0.60,0.20,0.20) """
 
"""____________________ Question 2.) __________________"""

# Loading the x and y values
x_actual = [3.44,3.44,4.07,3.73,3.78,5.25,5.424,5.345,2.20]
y_actual = [19.2,17.8,16.4,17.3,15.2,10.4,10.4,14.7,32.4]

# Initialising and declaring the necessary variables to be used
y_predicted = []
sse_list = np.array([])
b0_list = np.array([])
b1_list = np.array([])
sum1 = 0
store = 0
# Some of the variables are declared as array for the ease of using argmin()

# Constructing the nested for loop to calculate sse for each b0 and b1 values
for b0 in np.arange(37,39.01,0.01):
    for b1 in np.arange(-6,2.01,0.01):
        for x_counter in x_actual:
            y_predicted.append(b0 + b1*x_counter) 
        for i in range(0,9,1):
            store = (y_actual[i] - y_predicted[i])**2
            sum1 = sum1 + store
        sse_list = np.append(sse_list,sum1)
        b0_list = np.append(b0_list,b0)
        b1_list = np.append(b1_list,b1)
        y_predicted = []
        sum1 = 0
        store = 0

# Finding the position of the minimum value of sse
minimum_position = sse_list.argmin()
# Displaying the corresponding b0 and b1 values        
b0_final = round(b0_list[minimum_position],2)          
b1_final = round(b1_list[minimum_position],2)            
print("The value of B0 is:",b0_final,"and the value of B1 is:",b1_final,"as per minimising the SSE")

""" Final ans: B0 = 38.85 and B1 = -5.34 """        
# Note: The code will take up a bit of time due to the computation duration
# using array 

"""____________________ Question 3.) __________________"""

# Loading the x and y values
x_q3 = [2.76,1.80,2.50,10.64,12.01]
y_q3 = [7.40,4.81,6.69,28.52,32.18]

# Calculating the mean of x and y values
x_avg = (2.76+1.80+2.50+10.64+12.01)/5
y_avg = (7.40+4.81+6.69+28.52+32.18)/5

# Calculating sample standard devation of original x values
x_q3_sum = 0
for x_q3_counter in x_q3:
    x_q3_diffsq = (x_q3_counter - x_avg)**2
    x_q3_sum = x_q3_sum + x_q3_diffsq
x_q3_var = x_q3_sum/4
x_q3_var
x_q3_sd = x_q3_var**(1/2)
print("The sample standard deviation of the original x values is:",x_q3_sd)

# Calculating sample standard devation of original y values
y_q3_sum = 0
for y_q3_counter in y_q3:
    y_q3_diffsq = (y_q3_counter - y_avg)**2
    y_q3_sum = y_q3_sum + y_q3_diffsq
y_q3_var = y_q3_sum/4
y_q3_var
y_q3_sd = y_q3_var**(1/2)
print("The sample standard deviation of the original y values is:",y_q3_sd)

# Multiplying both the data sets by 2 for checking the change in scale
x_q3_new = []
for x_q3_traverse in x_q3:
    x_q3_new.append(x_q3_traverse*2)
x_q3_new
y_q3_new = []
for y_q3_traverse in y_q3:
    y_q3_new.append(y_q3_traverse*2)
y_q3_new

# Calculating the mean of new x and y values
x_avg2 = (5.52+3.6+5.0+21.28+24.02)/5
y_avg2 = (14.8+9.62+13.38+57.04+64.36)/5

# Calculating sample standard devation of new x values
x_q3_sum2 = 0
for x_q3_counter2 in x_q3_new:
    x_q3_diffsq2 = (x_q3_counter2 - x_avg2)**2
    x_q3_sum2 = x_q3_sum2 + x_q3_diffsq2
x_q3_var2 = x_q3_sum2/4
x_q3_var2
x_q3_sd2 = x_q3_var2**(1/2)
print("The sample standard deviation of the new x values is:",x_q3_sd2)

# Calculating sample standard devation of new y values
y_q3_sum2 = 0
for y_q3_counter2 in y_q3_new:
    y_q3_diffsq2 = (y_q3_counter2 - y_avg2)**2
    y_q3_sum2 = y_q3_sum2 + y_q3_diffsq2
y_q3_var2 = y_q3_sum2/4
y_q3_var2
y_q3_sd2 = y_q3_var2**(1/2)
print("The sample standard deviation of the new y values is:",y_q3_sd2)

# As can be seen from the standard deviation values, when the original data 
# was multiplied by a constant, in this case 2, the standard deviation changes 
# that many times from the original standard deviation, in this case,
# since the data set was multiplied by 2, the standard devaition increased to
# twice the original value

# Calculating sample semi-variance for the original x values
x_below_avg_data = []
for x_avg_check in x_q3:
    if (x_avg_check < x_avg) :
        x_below_avg_data.append(x_avg_check)
x_below_avg_data
x_q3_semi = 0
x_q3_semidiffsq = 0
for x_q3_semicounter in x_below_avg_data:
    x_q3_semidiffsq = (x_avg - x_q3_semicounter)**2
    x_q3_semi = x_q3_semi + x_q3_semidiffsq
x_q3_semivar = x_q3_semi/3
x_q3_semivar
print("The sample semi-variance of the x - values is:",x_q3_semivar)

# Calculating sample semi-variance for the original y values 
y_below_avg_data = []
for y_avg_check in y_q3:
    if (y_avg_check < y_avg) :
        y_below_avg_data.append(y_avg_check)
y_below_avg_data
y_q3_semi = 0
y_q3_semidiffsq = 0
for y_q3_semicounter in y_below_avg_data:
    y_q3_semidiffsq = (y_avg - y_q3_semicounter)**2
    y_q3_semi = y_q3_semi + y_q3_semidiffsq
y_q3_semivar = y_q3_semi/3
y_q3_semivar
print("The sample semi-variance of the y - values is:",y_q3_semivar)

""" Final Ans: Semi-variance for x values is 13.04 and y values is 93.74"""

"""____________________ Question 4.) __________________"""

# Initialising a list to check for max values of Likelihood function
from scipy.optimize import minimize
from scipy.optimize import Bounds
checker = []
X = np.array([2.29, 19.98, 0.06, 12.01, 7.04, 2.44])
# Running loop over guesses and appending the Likelihood function to the checking list
for i in np.arange(0.34, 0.36, 0.0001):
    checker.append(np.prod(i * np.exp(-i * X)))
    
# Checking which value is maximum.
np.arange(0.34, 0.36, 0.0001)[checker.index(max(checker))]

# MLE approach using Scipy
# This uses the Likelihood function directly and maximises it.
def objective(x):
    return -1 * np.prod(x * np.exp(-x * X))
bounds = Bounds(0.34, 0.36)
minimize(objective, np.array([0.30]), bounds=bounds)