# -*- coding: utf-8 -*-
"""
Created on Thu Feb  3 19:27:49 2022

@author: TANISH
"""

import numpy as np
import pandas as pd

""" Feature Engineering """

# A process of generating new features / information
# given the current data set


""" Working With Factors """

# One - Hot Encoding
# Whenever the Feature / Column is
# of the Nominal type

# Using Pandas

credit_data = pd.read_csv(r"C:\Users\tanis\Desktop\Credit Data.csv")

Gender = pd.get_dummies(credit_data["Gender"])
credit_data = pd.concat([credit_data,Gender],axis = 1)

credit_data.drop("Gender",axis = 1,inplace = True)


# Alternative

credit_data = pd.read_csv(r"C:\Users\tanis\Desktop\Credit Data.csv")
credit_data = pd.get_dummies(credit_data,
                         columns = ["Gender",
                                    "Job_Type"])


# Label Encoding
# Whenever the Feature / Column is
# of the Ordinal type

from sklearn import preprocessing


credit_data = pd.read_csv(r"C:\Users\tanis\Desktop\Credit Data.csv")

Label_Encoder = preprocessing.LabelEncoder()
credit_data["Education"] = Label_Encoder.fit_transform(credit_data["Education"])


""" Scaling Of Features """

# Using Standardisation
# We Standardise a feature (X) by,
# (X - Mu) / Sigma
# of its respective feature


from sklearn import preprocessing

standardize = preprocessing.StandardScaler()

standardize.fit_transform(credit_data[["Age","Net_Income"]])

pd.DataFrame(standardize.fit_transform(credit_data[["Age",
                                        "Net_Income"]])).describe()


# Using Normalization
# We Normalize a feature (X) by,
# (X - X_min) / (X_max - X_min)
# of its respective feature

normalize = preprocessing.MinMaxScaler()

normalize.fit_transform(credit_data[["Age","Net_Income"]])

pd.DataFrame(normalize.fit_transform(credit_data[["Age",
                                    "Net_Income"]])).describe()