#Q1
#A

#Null hypothesis: The true population mean is equal to 125 gms.
#Alternative hypothesis: The true population mean is not equal to 125 gms.
#significance level: 0.05.

# Sample data
sample_data <- c(129, 132, 127, 134, 131, 128, 130, 133, 126, 128,
                 127, 129, 133, 132, 130, 129, 135, 128, 131, 129,
                 130, 131, 132, 128, 126, 129, 130, 133, 130, 131)

# Sample mean
sample_mean <- mean(sample_data)

# Population mean
pop_mean <- 125

# Standard deviation
sd <- 20

# Sample size
n <- length(sample_data)

# Calculate the t-statistic and p-value
t.test(x = sample_data, mu = pop_mean, sd = sd/sqrt(n), alternative = "two.sided")

#B. 
n <- 9
s2 <- 127
a <- 0.05
lower <- (n - 1) * s2 / qchisq(1 - a/2, n - 1)
upper <- (n - 1) * s2 / qchisq(a/2, n - 1)
lower
upper

#C


#C.

# create a matrix of the survey data
survey_data <- matrix(c(298, 192, 196, 111), nrow = 2, byrow = TRUE)
rownames(survey_data) <- c("Cat owners", "Dog owners")
colnames(survey_data) <- c("Premium food", "Non-premium food")

# perform the hypothesis test
prop.test(survey_data, correct = FALSE, alternative = "greater", conf.level = 0.98)

#The p-value is 0.8, which is greater than the significance level of 0.02. Therefore, we fail to reject the null hypothesis and conclude that there is evidence to suggest that cat owners are more likely than dog owners to feed their pets premium food.


#Q2
#A
# set up data
data <- c(76, 89, 78, 85, 90, 94, 51, 89)

# sample mean
mean(data)

# estimate of population standard deviation
sd(data)*(length(data)/(length(data)-1))^0.5

# sample size
n <- length(data)

# sample mean
x_bar <- mean(data)

# standard error of the mean
se <- sd(data)/sqrt(n)

# margin of error
me <- qt(0.975, df = n-1)*se

# 95% confidence interval for the population mean
x_bar + c(-me, me)

#B. 

# create sugarlevels matrix
sugarlevels <- matrix(c(178, 187, 181, 189, 185, 
                        194, 191, 187, 190, 188,
                        173, 178, 169, 183, 176,
                        179, 183, 178, 169, 181), 
                      nrow = 4, byrow = TRUE, 
                      dimnames = list(c("Med1", "Med2", "Med3", "Med4"), 
                                      c("Store 1", "Store 2", "Store 3", "Store 4", "Store 5")))

# print sugarlevels matrix
sugarlevels

# perform one-way ANOVA
anova <- aov(as.vector(sugarlevels) ~ rep(c("Med1", "Med2", "Med3", "Med4"), each = 5))
summary(anova)

#C. 
turbo_1 <- c(13, 15, 16, 27, 28, 10)
turbo_12 <- c(42, 26, 17, 18, 29, 12)

cor.test(turbo_1, turbo_12)

#Q3 A

# create a data frame with the given data
data <- data.frame(p = c(20, 17.5, 16, 14, 12.5, 10, 8, 6.5),
                   q = c(125, 156, 183, 190, 212, 238, 250, 276))

# i. plot the data
plot(data$p, data$q, main = "Scatter Plot of P vs Q",
     xlab = "Price (P)", ylab = "Quantity Sold (Q)")

# ii. calculate the least squares regression
model <- lm(q ~ p, data = data)
summary(model)

# iii. provide the equation that best fits the model
coef(model)
#342.15 + (-10.59) * P
#iv.


# add the regression line to the plot
abline(model, col = "red")

         
# iv. predict the amount of dolls sold when price is 15
predict(model, newdata = data.frame(p = 15))

#Q3 B

#i. 
# Create a data frame with the given data
data <- data.frame(housing = c(2, 2.5, 3.2, 3.6, 3.3, 4, 4.2, 4.6, 4.8, 5),
                   sales = c(5, 5.5, 6, 7, 7.2, 7.7, 8.4, 9, 9.7, 10))

# Fit a linear regression model
model <- lm(sales ~ housing, data = data)

# Print the coefficients
summary(model)$coefficients

#The equation for the relationship between appliance sales and housing is:

#sales = -4.191935 + 2.064096 * housing

#ii) The slope of the regression line is 2.064096

#iii) 
# Compute the standard error of estimate
se <- summary(model)$sigma

# Print the standard error of estimate
se

#vi) # add the regression line to the plot
abline(model, col = "red")

#iv)
# Create a new data frame with the new prediction of housing
newdata <- data.frame(housing = 8)

# Compute the predicted appliance sales and the prediction interval
pred <- predict(model, newdata, interval = "predict", level = 0.9)

# Print the prediction interval
pred

#The approximate 90% prediction interval of appliance sales for a new prediction of housing of 8 million units is (10.71689, 15.31367) thousand units. This means that we are 90% confident that the actual appliance sales will be within this interval for a new prediction of housing of 8 million units.

