
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib as plt


#Q1)
#STUDENT 1
#a) Filtering the jobs available in Mumbai

mumbai_jobs = Problem[Problem["Location"].astype("str").str.contains("Mumbai")]
# There are 3731 jobs available in Mumbai


#b)Assuming the metro cities to be:
#Mumbai, Bengaluru, Kolkata, Delhi, Chennai, Gurgaon, Pune

metros = ["Delhi", "Kolkata", "Bengaluru", "Chennai", "Gurgaon", "Pune"]

metro_jobs = 0
for i in metros:
    metro_jobs = metro_jobs + Problem[Problem["Location"].astype("str").str.contains(i)].shape[0]
metro_jobs

list = ["Delhi", "Kolkata", "Bengaluru", "Chennai", "Gurgaon", "Pune"]
delhi_jobs = 0
kolkata_jobs = 0
bengaluru_jobs = 0
chennai_jobs = 0
gurgaon_jobs = 0
pune_jobs = 0
for i in list:
    if (i == "Delhi"): 
        delhi_jobs = delhi_jobs + Problem[Problem["Location"].astype("str").str.contains(i)].shape[0]
    if (i == "Kolkata"): 
        kolkata_jobs = kolkata_jobs + Problem[Problem["Location"].astype("str").str.contains(i)].shape[0]
    if (i == "Bengaluru"): 
        bengaluru_jobs = bengaluru_jobs + Problem[Problem["Location"].astype("str").str.contains(i)].shape[0]
    if (i == "Chennai"): 
        chennai_jobs = chennai_jobs + Problem[Problem["Location"].astype("str").str.contains(i)].shape[0]
    if (i == "Gurgaon"): 
        gurgaon_jobs = gurgaon_jobs + Problem[Problem["Location"].astype("str").str.contains(i)].shape[0]
    if (i == "Pune"): 
        pune_jobs = pune_jobs + Problem[Problem["Location"].astype("str").str.contains(i)].shape[0]

 
delhi_jobs 
#Jobs available in Delhi are 1834

kolkata_jobs 
#Jobs available in Kolkata are 373

bengaluru_jobs 
#Jobs available in Bangaluru are 5529

chennai_jobs 
#Jobs available in Chennai are 1518

gurgaon_jobs 
#Jobs available in Gurgaon are 2770

pune_jobs
#Jobs available in Pune are 1781

metro_jobs
#Jobs available in metro cities are 13805

 
#STUDENT 2
#a)

sb.displot(Problem["Experience"], kind = "hist")        
sb.histplot(Problem["Experience"], kde = True)


openings = pd.value_counts(Problem.Experience).reset_index()
openings.columns = ["Experience", "Frequency"]
top5 = openings.iloc[0:5,:]

import matplotlib.pyplot as plt
plt.bar(top5.Experience, top5.Frequency, color = "red")
#The bar chart shows the years of experience that the majority openings require

'''
  Experience  Frequency
0   5-10 yrs       1274
1    2-5 yrs       1188
2    3-8 yrs        922
3    2-7 yrs        832
4    4-9 yrs        678
'''

#Sorting the ascending values of Experience
fresher_jobs = openings.iloc[0:6, :]

'''
   Experience  Frequency
38    0-0 yrs        151
26    0-1 yrs        296
21    0-2 yrs        386
28    0-3 yrs        269
39    0-4 yrs        130
'''

total_fresher_jobs = sum(fresher_jobs.Frequency)

#The total number of fresher jobs are 1653
