## Roll No. 405 AOI assignment
#Q1
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib as plt
data = pd.read_csv(r"C:\Users\hp\Documents\Problem statement 1.csv")

data.describe()
data.head()
#Student 1
#Q1A
mumbai = data[data["Location"].astype("str").str.contains("Mumbai")]
mumbai.count()
#Jobs available in mumbai are only 3731 out of almost 20K

#Q1B
list = ["Bengaluru", "Kolkata", "Delhi", "Chennai"]

Bang_jobs = 0
kol = 0
delhi = 0
chennai = 0

for i in list:
    if(i == "Bangluru"):
        Bang_jobs +=1 
        data[data["Location"].astype("str").str.contains(i)].shape[0]
    elif(i == "Kolkata"):
         kol +=1 
         data[data["Location"].astype("str").str.contains(i)].shape[0]
    elif(i == "Delhi"):
         delhi +=1 
         data[data["Location"].astype("str").str.contains(i)].shape[0]
    elif(i == "Chennai"):
         chennai +=1 
         data[data["Location"].astype("str").str.contains(i)].shape[0]
Bang_jobs()
#getting error

#Student 2
#a
sb.displot(data["Experience"],kind = "hist")
data["Experience"].head()
fresfers1 = data[data["Experience"].astype('str').str.contains("0")]
fresfers1.count()
fresfers2 = data[data["Experience"].astype('str').str.contains("1")]
fresfers2.count()
fresfers3 = data[data["Experience"].astype('str').str.contains("2")]
fresfers3.count()
fresfers4 = data[data["Experience"].astype('str').str.contains("3")]
fresfers4.count()

#overlapping between outcomes not getting the correct count
#Q2
data2 = pd.read_csv(r"C:\Users\hp\Documents\Problem statement 2.csv")
from sklearn.linear_model import LinearRegression

X1 = data2.loc[:,data2.columns == "X1"]
Y1 = data2.loc[:,data2.columns == "Y1"]

lm = LinearRegression()
M1 = lm.fit(X1,Y1)

X2 = data2.loc[:,data2.columns == "X2"]
Y2 = data2.loc[:,data2.columns == "Y2"]

lm = LinearRegression()
M2 = lm.fit(X2,Y2)

X3 = data2.loc[:,data2.columns == "X3"]
Y3 = data2.loc[:,data2.columns == "Y3"]

lm = LinearRegression()
M3 = lm.fit(X3,Y3)

X4 = data2.loc[:,data2.columns == "X4"]
Y4 = data2.loc[:,data2.columns == "Y4"]

lm = LinearRegression()
M4 = lm.fit(X4,Y4)


