# -*- coding: utf-8 -*- """ Created on Thu Nov 25 20:00:01 2021 @author: TANISH """ """ Python is Case sensitive """ # For Python, small letters are not equal to capital letters # For eg, "n" is NOT equal to "N" # For Binomial Distribution, as X ~ Bin(n,p) n = 5 p = 0.1 print("n = ",n) print("N = ",N) print("p = ",p) print("P = ",P) # We certainly get an error because we have defined the # parameters "n" and "p" and not "N" and "P" N = 10 P = 0.4 print("n = ",n) print("N = ",N) print("p = ",p) print("P = ",P) # We see that the output for "p" , "n" are different as of # "P" and "N" """ Creating Python Variables """ # All variable should start with _ or alphabets and continue with # alphabets or numbers # No special characters allowed (except "_") # No "." allowed """ Python overwrites variable """ # If a variable with a certain name is already available # in the enviroment of Python and the same name is alotted to # a new expression / dataset then Python will delete all the # contents of the variable had previously and will store # the new information # For Gamma Distribution alpha = 2 lamda = 0.4 # For Exponential Distribution lamda = 0.25 print(lamda) # We see that, lamda which was having a floating value of 0.4 # Now has a floating value of 0.25 """ Python follows BODMAS rule """ # That is, all expressions within () will be solved first, giving # them the highest priority # Calculate the Cumulative density function for, # x = 0.1 where X ~ U(a = 0, b = 5) x = 0.5 a = 0 b = 5 x - a / b - a # Here we see that, the division a / b takes place first (x - a) / (b - a) # Here (x - a) and (b - a) are calculated first and then # the division takes places """ Numeric Operations """ # Addition # To add 2 or more numbers (floating, integer, ...) # We use "+" to add numbers # An obs which was notes as 56 kgs by the agent during a survey # was actually 2 kg less weight_reported = 56 true_weight = (56 + 2) # Subraction # We use "-" to Subtract numbers # Find the deviation of the observation from its mean obs = 56 mean = 46 deviation = obs - mean # Multiplication # We use "*" to multiply numbers # Calculate Variance of X, X ~ Bin(n = 10, p = 0.44567) n = 10 p = 0.44567 variance = n * p * (1 - p) print(variance) # Division & Power # We use "/" and "**" as to divide and calculate # the power respectively # Calculate Variance of X, X ~ U(0,15) a = 0 b = 15 variance = ((b - a) ** 2) / 12 print(variance) # Again, we see that Python has overwritten the previous # information available in the variable "variance # Modulo # Modulo gives the remainder after performing the division # between 2 numbers # A Statistician wants to fit 2 MVN distribution, # one using 13 of 21 variables in dataset # What no. of variables will be used to fit the 2nd MVN? variables_left = 21 % 13 print(variables_left) """ Working on Strings """ # In Pyhton, we write all characters / strings in "" print("Normal Distribution is Symmetric in nature") print("SNV is a special case of Normal Distribution") # Use "+" to concatenate 2 or more strings print("MLE" + " is a method of estimation") string_1 = print("Normal Distribution is Symmetric in nature") string_2 = print("SNV is a special case of Normal Distribution") string_1 + string_2 # The above command gives an error because "+" is used to # concatenate 2 strings and NOT 2 print() statements # Alternative way to concatenate strings string_1 = "Normal Distribution is Symmetric in nature" string_2 = "SNV is a special case of Normal Distribution" print(string_1,string_2,sep = " & ") # The sep = " " argument can be used to seperate two or more # strings with any given delimiter # For eg, "," "." "/" "|" etc... # Print a string on 2 or more different lines # We use "\n" to take the remainder of string, after \n # to print on a new line print("Normal Distribution is /n Symmetric in nature") # How to use String and a variable in print() mu = 5 sigma = 0.9 print("Mean = ",mu,"sigma = ",sigma) print("mu = ",mu,"\t sigma = ",sigma) # "\t" is used as a tab separater # At times, we want to import data by providing the Path, # However, for eg, "C:\tanish" in which the "\t" will act as a # tab and not a part of the Location # Use "r" before the start of the string to make the string as a # raw string path_to_import_data = "\tanish\Desktop\Climate.csv" print(path_to_import_data) path_to_import_data = r"\tanish\Desktop\Climate.csv" print(path_to_import_data) # Extracting a particular part of String # In Python, indexing starts with 0 Name = "FGM Distribution" # Therefore, # F G M D I S T R I B U T I O N # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 # ... -3 -2 -1 # Each space is also alloted 1 index value Name[0:6] # Interpretation for above command # Python considers 0:6 as 0 to 5, i.e [0,6) i.e [0,5] # That is : = : # Hence, 0:6 is equal to 0 1 2 3 4 5 # We can use a negative Index to extract elements # from back to front Name[-1] """ Logical Operations """ # We see how to work with Boolean values # Or Logical values like: True / False # Eg, # An Actuary is trying to Model the distribution of Claim amounts # He assumes the distribution to be Exponential # He used 2 methods to estimate the parameters # 1.) Method of Moments # 2.) Maximum Likelihood Estimation # He wants to know how both these estimates actually # differs from each other import random lambda_mom = random.uniform(0.5,1.5) lambda_mle = random.uniform(0.5,1.5) # Equal To (==) lambda_mle == lambda_mom # Not Equal To (!=) lambda_mle != lambda_mom # Greater Than (>) lambda_mle > lambda_mom # Greater Than Or Equal (>=) lambda_mle >= lambda_mom # Less Than (<) lambda_mle < lambda_mom # Less Than Or Equal (<=) lambda_mle <= lambda_mom # Eg. # The Actuary has given 3 papers in his previous attempts # for which the results are out just now # He has downlaoded the results and will decide to party if, result_1 = random.choice(["P","F"]) result_2 = random.choice(["P","F"]) result_3 = random.choice(["P","F"]) # Use of AND operator (& / and) # NO parties if he don't pass in any 1 of them (result_1 == "P") and (result_2 == "P") and (result_3 == "P") (result_1 == "P") & (result_2 == "P") & (result_3 == "P") # Use of OR operator (| / or) # He will party if he pass in any one of them (result_1 == "P") | (result_2 == "P") | (result_3 == "P") (result_1 == "P") or (result_2 == "P") or (result_3 == "P") # Use of NOT operator (not) # NOT take an value opposite of the true Boolean value # Eg, NOT will make True equal to False and vice versa # He will party irrespective of the results # The above question is covered in Quiz at the end of the script """ List """ # List is one of the Data Structures which is Mutable # Mutable means that once created, alterations / modifications # can be made # A List can hold multiple data types and data structures together # even in Nested forms distributions_list = ["Gamma","Normal","Chi-sq","Students-t","F"] # All strings distributions_list = ["Gamma",True,0.125] # Mix of, String, Boolean, Numeric # Assigment / Changing elements distributions_list[0] = "FGM" # Recall: Indexing starts with 0 # Syntax to create a list # = [] # use "," to separate elements # Extracting elements from list using Indexes distributions_list[0] # Adding more elements in a list # Using "+" distributions_list = distributions_list + ["MNV"] # Using the append() function distributions_list.append("MNV") # Removing an element from list # pop() takes only indexes and remove elements based on indexes distributions_list.pop(0) # remove() takes only the name of an element distributions_list.remove("Gamma") list_to_remove = ["Gamma","MVN"] # We cannot directly use multiple elements in pop() remove() # to remove multiple items from a list # Sorting a list # Ascending Order distributions_list.sort() # Descending Order distributions_list.sort(reverse = True) """ Tuple """ # Tuple's are Immutable, i.e once created they cannot # be altered or modified # Syntax for creating a Tuple # = (,,) # Use "," to seperate elements distributions_tuple = ("Gamma","Normal","Chi-sq","Students-t","F") # Extracting elements from Tuple using indexing distributions_tuple[0] # Check-1: Tuple's are Immutable distributions_tuple.append("MVN") # Check-2: Tuple's are Immutable distributions_tuple[0] = "FGM" # Sorting a Tuple without changing it sorted(distributions_tuple, reverse = False) # Gives an output of sorted elements # Check-3: Tuple's are Immutable distributions_tuple.sort() # Eg. # Creation of a List within a Tuple and other Data Types dummy = (1,True,"str",[1,2]) # Numeric, Boolean, String, List """ Dictonary """ # Another data structure, having the Key-Value pair format # All keys has to be unique # Syntax to create a Dictionary # = { : , ...} # Eg. # We have a set of 100 emails, and we count how many unique words # has repeated (frequency) in those 100 emails and below are the # top 5 words bag_of_words = {"good" : 11, "meeting" : 13, "a" : 14, "hi" : 44, "Dear" : 20} # Check: ALl key has to be unique bag_of_words["good"] = 44 # Adding elements to a Dictionary bag_of_words["schedule"] = 12 # Deleting a particular Key-Value pair del bag_of_words["good"] """" Set """ # Another Data Structure in Python, similar to the "set" in Maths # Set: a collection of items / elements # A Set can only contain unique values at all times # Syntax to create a set # = {,,} # set() # Creating a Set li = [0,2] even_no = set(li) even_no = {0,2,4,6,8,10} perfect_squares = {1,4,9,16,25,36,49,64,81,100} # Add an elements into a Set even_no.add(12) # Discard / Remove an element from a Set even_no.discard(12) # Based on values and not Index # Union # Gets all unique values among the 2 or more Sets even_no.union(perfect_squares) # Intersection # Gets only the values which are in common among the 2 or more Sets even_no.intersection(perfect_squares) # Difference # Removing elements from that are present in (say) even_no - perfect_squares perfect_squares - even_no # Subset # To check is some element(s) is/are a part of a Set {4}.issubset(even_no) # Superset # Set which has another Set included in it even_no.issuperset({4}) {4}.issuperset(even_no) # Set are not Subscriptable # That is indexing do not work with Sets # Check: even_no[0:2] """ Class Excercise """ # Classify the following variable as Valid / Invalid _Var1 = "" #Valid Var_2. = "" # Invalid: "." Var3_ = "" # Valid Var4& = "" # Invalid: "&" 5Var = "" # Invalid: "5" at start V6 = "" # Valid # Identify the type of variables below distribution = "Weibull" # String no_of_parameters = "2" # String alpha = 2 # Int lamda = 0.4567 # Float # Alternative Check type(distribution) type(no_of_parameters) type(alpha) type(lamda) # Write down the output of the below expressions, print("Error..." * 3) "Random Forest can be used for Classification"[:-1] "Random Forest can be used for Classification"[-1:] "Random Forest can be used for Classification"[4:-1] alpha = beta = theta = 0.145 """ Quiz """ # Q1.) Calculate AND print the squared deviations of the # following obs: 23, 32, 26 # Answer: obs = [23,32,26] mean = (obs[0] + obs[1] + obs[2]) / len(obs) squared_deviations = list() squared_deviations.append((obs[0] - mean)**2) squared_deviations.append((obs[1] - mean)**2) squared_deviations.append((obs[2] - mean)**2) print(squared_deviations) # Q2.) Print Symmetric Distribution from, # "Normal Distribution is Symmetric in nature" # Answer: string = "Normal Distribution is Symmetric in nature" print(string[23:32], string[7:19], sep = " ") # OR print(string[22:32], string[7:19], sep = " ") # Q3.) An Actuary has 984 chocolates which he wants to distributes # among his 456 friends on his Birthday # He wonders, how many minimum chocolates will be left if # all his friends are given equal no. of chocolates # Answer: leftover = 984 % 456 print("The Actuary has",leftover," no. of leftover chocolates") # Q4.) Later, he found that he's bag can only accomodate # (((59**3 - 40) // 66) // 110) chocolates. # Comment on the same. # Answer: bag_capacity = (((59**3 - 40) // 66) // 110) decision = leftover <= bag_capacity print("Can the Actuary accomodate the leftover chocolates? :",decision) # Q5.) The Actuary has given 3 papers in his previous attempts # for which the results are out just now, # He decides that he will party irrespective of the # (You are suppose to use the results of all 3 papers) # Answer: import random result_1 = random.choice(["P","F"]) result_2 = random.choice(["P","F"]) result_3 = random.choice(["P","F"]) not(((result_1 == "P") & (result_2 == "P") & (result_3 == "P")) & ((result_1 == "F") & (result_2 == "F") & (result_3 == "F")))