# -*- coding: utf-8 -*-

import numpy as np 
from sklearn.linear_model import LinearRegression
from scipy.optimize import minimize, Bounds

#1)
# Setting the transition matrix
P = np.array([
     [0.75, 0.15, 0.1], 
     [0.45, 0.45, 0.1], 
     [0.3, 0.1, 0.6]
     ])
# Finding the right side eigen vectors of the transition matrix
lhs, rhs = np.linalg.eig(P)
# Checking which eigen value is close to 1 that will give us pi * p = pi
rhs1 = rhs[:, np.isclose(lhs, 1)][:, 0]
# Getting the final stationary distribution
stationary = rhs1 / rhs1.sum()
print(f"Stationary Distribution is: {stationary}")

# Using loop
i = 1
while i <= 1000:
    print(i)
    this = np.linalg.matrix_power(P, i)
    prev = np.linalg.matrix_power(P, i-1)
    if np.array_equal(prev, this):
        print(f"Stationary Distribution is: {np.diag(this)}\n Found on iteration {i}")
        print(this)
        break 
    else:
        i+=1
        continue

#2)
# Defining the data
x = np.array([3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.20])
y = np.array([19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4])

# Getting s_xx, s_yy, s_xy values
s_xx = sum(x**2) - (len(x) * np.mean(x)**2)
s_yy = sum(y**2) - (len(x) * np.mean(y)**2)
s_xy = sum(x * y) - (len(x) * np.mean(x) * np.mean(y))

# Calculation of B0 and B1 values
B1 = s_xy / s_xx
B0 = np.mean(y) - (B1 * np.mean(x))

# Checking if answer is correct
reg = LinearRegression()
reg.fit(x.reshape((-1, 1)), y)
round(reg.coef_[0], 2) == round(B1, 2)
round(reg.intercept_, 2) == round(B0, 2)

# Both sanity checks return true.

#3)
# Defining the variables
x3 = np.array([2.76, 1.80, 2.50, 10.64, 12.01])
y3 = np.array([7.40, 4.81, 6.69, 28.52, 32.18])

np.std(x3)
np.std(y3)

# Checking the effect scaling has on the standard deviation
for i in [2, 5, 7.5, 10, 50, 100]:
    print(f"Scaling value: {i}")
    print(f"x: {np.std(x3 * i)}; Divisor: {np.std(x3 * i) / np.std(x3)}")
    print(f"x: {np.std(y3 * i)}; Divisor: {np.std(y3 * i) / np.std(y3)}")
    print("\n")
# The standard deviation changes in respect to the scaling factor with the relationship
# new_sd = scaling_factor * old_sd

# Calculation of semi-variance
np.var(np.array([x for x in x3 if x <= np.mean(x3)]))
np.var(np.array([x for x in y3 if x <= np.mean(y3)]))

# We see that the difference between the semi-variance and variance in y is greater 
# than the same difference in x.

#4)
# Initialising a list to check for max values of Likelihood function
checker = []
X = np.array([2.29, 19.98, 0.06, 12.01, 7.04, 2.44])
# Running loop over guesses and appending the Likelihood function to the checking list
for i in np.arange(0.34, 0.36, 0.0001):
    checker.append(np.prod(i * np.exp(-i * X)))
    
# Checking which value is maximum.
np.arange(0.34, 0.36, 0.0001)[checker.index(max(checker))]

# MLE approach using Scipy
# This uses the Likelihood function directly and maximises it.
def objective(x):
    return -1 * np.prod(x * np.exp(-x * X))
bounds = Bounds(0.34, 0.36)
minimize(objective, np.array([0.30]), bounds=bounds)