import numpy as np
## Question 1
import pandas as pd
import seaborn as sb

problem_1 = pd.read_excel(r"C:\Users\nikhi\Downloads\Problem 1.xlsx")

## Student 1:
## a)
mumbai_jobs = problem_1[problem_1["Location"].astype("str").str.contains("Mumbai")]

## b)
metro_cities = ["Bengaluru", "Kolkata", "Delhi", "Chennai", "Gurgaon", "Pune"]

metro_jobs = 0
for i in metro_cities:
    metro_jobs = metro_jobs + problem_1[problem_1["Location"].astype("str").str.contains(i)].shape[0]
metro_jobs

list = ["Bengaluru", "Kolkata", "Delhi", "Chennai", "Gurgaon", "Pune"]
bengaluru_jobs = 0
kolkata_jobs = 0
delhi_jobs = 0
chennai_jobs = 0
gurgaon_jobs = 0
pune_jobs = 0
for i in list:
    if (i == "Bengaluru"): 
        bengaluru_jobs = bengaluru_jobs + problem_1[problem_1["Location"].astype("str").str.contains(i)].shape[0]
    if (i == "Kolkata"): 
        kolkata_jobs = kolkata_jobs + problem_1[problem_1["Location"].astype("str").str.contains(i)].shape[0]
    if (i == "Delhi"): 
        delhi_jobs = delhi_jobs + problem_1[problem_1["Location"].astype("str").str.contains(i)].shape[0]
    if (i == "Chennai"): 
        chennai_jobs = chennai_jobs + problem_1[problem_1["Location"].astype("str").str.contains(i)].shape[0]
    if (i == "Gurgaon"): 
        gurgaon_jobs = gurgaon_jobs + problem_1[problem_1["Location"].astype("str").str.contains(i)].shape[0]
    if (i == "Pune"): 
        pune_jobs = pune_jobs + problem_1[problem_1["Location"].astype("str").str.contains(i)].shape[0]
        
## Student 2
## a)
sb.displot(problem_1["Experience"], kind = "hist")        
sb.histplot(problem_1["Experience"], kde = True)

openings = pd.value_counts(problem_1.Experience).reset_index()
openings.columns = ["Experience", "Frequency"]
top5 = openings.iloc[0:5,:]

import matplotlib.pyplot as plt
plt.bar(top5.Experience, top5.Frequency, color = "yellow")        

fresher_jobs = openings.iloc[0:6, :]

total_fresher_jobs = sum(fresher_jobs.Frequency)
