#Q1
#A

#Null hypothesis: The true population mean is equal to 125 gms.
#Alternative hypothesis: The true population mean is not equal to 125 gms.
#significance level: 0.05.

# Sample data
sample_data <- c(129, 132, 127, 134, 131, 128, 130, 133, 126, 128,
                 127, 129, 133, 132, 130, 129, 135, 128, 131, 129,
                 130, 131, 132, 128, 126, 129, 130, 133, 130, 131)

# Sample mean
sample_mean <- mean(sample_data)

# Population mean
pop_mean <- 125

# Standard deviation
sd <- 20

# Sample size
n <- length(sample_data)

# Calculate the t-statistic and p-value
t.test(x = sample_data, mu = pop_mean, sd = sd/sqrt(n), alternative = "two.sided")

#B. 
# For n = 25
z1 <- (92 - 98.6) / (17.2 / sqrt(25))
z2 <- (102 - 98.6) / (17.2 / sqrt(25))
prob1 <- pnorm(z2) - pnorm(z1)
prob1

# For n = 36
sd <- 17.2 * sqrt(25/36)
z1 <- (92 - 98.6) / (sd / sqrt(36))
z2 <- (102 - 98.6) / (sd / sqrt(36))
prob2 <- pnorm(z2) - pnorm(z1)
prob2

#So the probability of P(92 < X < 102) in a sample of 25 observations is 0.9131814, and the probability of P(92 < X < 102) in a sample of 36 observations is 0.9399834.

#C.

# create a matrix of the survey data
survey_data <- matrix(c(152, 81, 280-152, 190-81), nrow = 2, byrow = TRUE)
rownames(survey_data) <- c("Cat owners", "Dog owners")
colnames(survey_data) <- c("Premium food", "Non-premium food")

# perform the hypothesis test
prop.test(survey_data, correct = FALSE, alternative = "greater", conf.level = 0.98)

#The p-value is 1.118e-04, which is less than the significance level of 0.02. Therefore, we reject the null hypothesis and conclude that there is evidence to suggest that cat owners are more likely than dog owners to feed their pets premium food.

#Q2 
#A # create a data frame with the sales data
# Create a matrix of sales data
sales <- matrix(c(78, 87, 81, 89, 85,
                  94, 91, 87, 90, 88,
                  73, 78, 69, 83, 76,
                  79, 83, 78, 69, 81), 
                nrow = 4, byrow = TRUE, 
                dimnames = list(c("Free Sample", "One Gift Pack", "Cents Off", "Refund by Email"), 
                                c("Store 1", "Store 2", "Store 3", "Store 4", "Store 5")))

# Print the sales matrix
sales

# Perform chi-squared test of independence
chisq.test(sales)



#Since the p-value is greater than 0.05, we do not reject the null hypothesis that there is no association between the promotions and sales. Therefore, we cannot conclude that any one promotion is more effective than the others.

#B
# Create sample data
Gender <- c("Male", "Female", "Male", "Female", "Male", "Male", "Female", "Female")
Smoking <- c("Yes", "No", "No", "Yes", "Yes", "No", "Yes", "No")

# Create contingency table
table(Gender, Smoking)


#C
# Create sample data
x <- c(3, 8, 7, 6, 10, 5, 2, 9, 4, 1)
y <- c(5, 12, 11, 9, 15, 8, 3, 14, 6, 2)

# Perform correlation test
cor.test(x, y)

#Q3 A

#i. 
# Create a data frame with the given data
data <- data.frame(housing = c(2, 2.5, 3.2, 3.6, 3.3, 4, 4.2, 4.6, 4.8, 5),
                   sales = c(5, 5.5, 6, 7, 7.2, 7.7, 8.4, 9, 9.7, 10))

# Fit a linear regression model
model <- lm(sales ~ housing, data = data)

# Print the coefficients
summary(model)$coefficients

#The equation for the relationship between appliance sales and housing is:

#sales = -4.191935 + 2.064096 * housing

#ii) The slope of the regression line is 2.064096

#iii) 
# Compute the standard error of estimate
se <- summary(model)$sigma

# Print the standard error of estimate
se

#iv)
# Create a new data frame with the new prediction of housing
newdata <- data.frame(housing = 8)

# Compute the predicted appliance sales and the prediction interval
pred <- predict(model, newdata, interval = "predict", level = 0.9)

# Print the prediction interval
pred

#The approximate 90% prediction interval of appliance sales for a new prediction of housing of 8 million units is (10.71689, 15.31367) thousand units. This means that we are 90% confident that the actual appliance sales will be within this interval for a new prediction of housing of 8 million units.


#Q3 B 

# Load the mtcars dataset
data(mtcars)
#high_mpg
mtcars$high_mpg <- ifelse(mtcars$mpg >= 20, 1, 0)
#glm 
model <- glm(high_mpg ~ wt + hp + drat + qsec, data = mtcars, family = "binomial")
#coefficients
summary(model)
#prediction
new_data <- data.frame(wt = 3.5, hp = 120, drat = 3.9, qsec = 17)
prob <- predict(model, newdata = new_data, type = "response")
prob
