# -*- coding: utf-8 -*-
"""
Created on Mon Jan 31 20:21:55 2022

@author: TANISH
"""


""" Data Cleaning """

import pandas as pd
import numpy as np
from numpy import random
random.seed(11)

py_asgmt = random.randint(0,100,5)
rm_asgmt = random.randint(0,100,5)
fe_asgmt = random.randint(0,100,5)


student_roll = ["A051","A052","A053",
                   "A054","A055"]


Report_1 = pd.DataFrame(np.array([student_roll,
                                  py_asgmt,rm_asgmt]).T,
             columns = ["Roll_No","Python","RM"])


student_roll = ["A051","A052","A056",
                   "A054","A055"]

Report_2 = pd.DataFrame(np.array([student_roll,
                                  fe_asgmt]).T,
             columns = ["Roll_No","FE"])


Report = Report_1.merge(Report_2,how = "left",
                        on = "Roll_No")


# To identify the proportion of Null Values
# Using isnull()
# Checks if a particular cell
# is NaN or not

# Row Wise
Report.isnull().sum(axis = 1) / len(Report.columns)

# Column Wise
Report.isnull().sum(axis = 0) / len(Report)


# dropna()
# To drop all rows / columns containing NaN
Report.dropna(axis = 0)
Report.dropna(axis = 1)

# axis = 0: Rows
# axis = 1: Columns


Report.dropna(thresh = 4)

# thresh = <int>
# A threshold that atleast those no.
# of elements should be NON-NaN

# fillna()
# Substitute NaN with
# some specicifed value

Report.fillna(value = 0)


Report.fillna(method = "bfill")
Report.fillna(method = "ffill")
